Esempio n. 1
0
    def run(self):
        if __debug__:
            logging_info = {
                'mod': 'ReceiveFirstPhaseCommitMessageAction',
                'endpoint_string': self.local_endpoint._endpoint_uuid_str
                }
            log_msg = 'Start receive first phase commit message ' + str(self.event_uuid)
            util.get_logger().info(log_msg,extra=logging_info)

        
        act_event = self.local_endpoint._act_event_map.get_event(
            self.event_uuid)


        if act_event != None:
            if self.successful:
                act_event.receive_successful_first_phase_commit_msg(
                    self.event_uuid,self.msg_originator_endpoint_uuid,
                    self.children_event_endpoint_uuids)
            else:
                act_event.receive_unsuccessful_first_phase_commit_msg(
                    self.event_uuid,self.msg_originator_endpoint_uuid)
            

        if __debug__:
            log_msg = 'End receive first phase commit message ' + str(self.event_uuid)
            util.get_logger().info(log_msg,extra=logging_info)
Esempio n. 2
0
    def run(self):
        if __debug__:
            logging_info = {
                'mod': 'ReceiveSubscriberAction',
                'endpoint_string': self.local_endpoint._endpoint_uuid_str
                }
            log_msg = 'Start receive subscriber for ' + str(self.event_uuid)
            util.get_logger().info(log_msg,extra=logging_info)

        
        evt = self.local_endpoint._act_event_map.get_event(self.event_uuid)
        
        if evt == None:
            # the event was already backed out or committed.  Do not
            # need to keep forwarding info about it.
            return
        
        if self.removed:
            evt.notify_removed_subscriber(
                self.subscriber_event_uuid,self.host_uuid,
                self.resource_uuid)
        else:
            evt.notify_additional_subscriber(
                self.subscriber_event_uuid,self.host_uuid,
                self.resource_uuid)

        if __debug__:
            log_msg = 'End receive subscriber for ' + evt.str_uuid
            util.get_logger().info(log_msg,extra=logging_info)
Esempio n. 3
0
    def run(self):
        if __debug__:
            logging_info = {
                'mod': 'ReceiveEndpointCallAction',
                'endpoint_string': self.local_endpoint._endpoint_uuid_str
                }
            log_msg = 'Start receive endpoint call action ' + str(self.event_uuid)
            util.get_logger().info(log_msg,extra=logging_info)

        
        act_evt_map = self.local_endpoint._act_event_map
        act_event = act_evt_map.get_or_create_endpoint_called_event(
            self.endpoint_making_call,self.event_uuid,self.result_queue)
        
        import waldoVariableStore
        evt_ctx = waldoExecutingEvent._ExecutingEventContext(
            self.local_endpoint._global_var_store,
            # should not have any sequence local data from an endpoint
            # call.
            waldoVariableStore._VariableStore(
                self.local_endpoint._host_uuid) )
        # receiving endpoint must know that this call was an endpoint
        # call.  This is so that it can ensure to make deep copies of
        # all non-external arguments (including lists,maps, and user
        # structs).
        evt_ctx.set_from_endpoint_true()
        exec_event = waldoExecutingEvent._ExecutingEvent(
            self.to_exec,act_event,evt_ctx,self.result_queue,
            *self.args)

        exec_event.start()
        
        if __debug__:
            log_msg = 'End receive endpoint call action ' + act_event.str_uuid
            util.get_logger().info(log_msg,extra=logging_info)
Esempio n. 4
0
    def __init__(self, *args, **kwargs):
        super(Gauge, self).__init__(*args, **kwargs)
        sysprefix = get_sys_prefix()
        self.config_file = os.getenv(
            'GAUGE_CONFIG', sysprefix + '/etc/ryu/faucet/gauge.yaml')
        self.exc_logfile = os.getenv(
            'GAUGE_EXCEPTION_LOG',
            sysprefix + '/var/log/ryu/faucet/gauge_exception.log')
        self.logfile = os.getenv(
            'GAUGE_LOG', sysprefix + '/var/log/ryu/faucet/gauge.log')

        # Setup logging
        self.logger = get_logger(
            self.logname, self.logfile, logging.DEBUG, 0)
        # Set up separate logging for exceptions
        self.exc_logger = get_logger(
            self.exc_logname, self.exc_logfile, logging.CRITICAL, 1)

        # Set the signal handler for reloading config file
        signal.signal(signal.SIGHUP, self.signal_handler)

        # dict of watchers/handlers:
        # indexed by dp_id and then by name
        self.watchers = {}
        confs = watcher_parser(self.config_file, self.logname)
        for conf in confs:
            watcher = watcher_factory(conf)(conf, self.logname)
            self.watchers.setdefault(watcher.dp.dp_id, {})
            self.watchers[watcher.dp.dp_id][watcher.conf.type] = watcher
        # Create dpset object for querying Ryu's DPSet application
        self.dpset = kwargs['dpset']
Esempio n. 5
0
    def service(self):
        if __debug__:
            logging_info = {
                'mod': 'ReceiveRequestBackoutAction',
                'endpoint_string': self.local_endpoint._endpoint_uuid_str
                }
            log_msg = 'Start receive request backout action ' + str(self.uuid)
            util.get_logger().info(log_msg,extra=logging_info)

        
        evt = self.local_endpoint._act_event_map.get_and_remove_event(
            self.uuid)

        if evt == None:
            # could happen for instance if there are loops in endpoint
            # call graph.  In this case, might get more than one
            # request to backout an event.  However, the first backout
            # has already removed the the active event from the active
            # event map.
             return

        skip_partner = False
        if self.requesting_endpoint == util.PARTNER_ENDPOINT_SENTINEL:
            skip_partner = True

        # FIXME: should probably be in a separate thread
        evt.forward_backout_request_and_backout_self(skip_partner)

        if __debug__:
            log_msg = 'End receive request backout action ' + evt.str_uuid
            util.get_logger().info(log_msg,extra=logging_info)
Esempio n. 6
0
    def run(self):
        if __debug__:
            logging_info = {
                'mod': 'ReceiveRequestCommitAction',
                'endpoint_string':  self.local_endpoint._endpoint_uuid_str
                }
            log_msg = 'Start receive request commit action ' + str(self.event_uuid)
            util.get_logger().info(log_msg,extra=logging_info)

        
        evt = self.local_endpoint._act_event_map.get_event(self.event_uuid)
        
        if evt == None:
            # can happen if commit is requested and then
            #  a ---> b ---> c
            # 
            #     a asks for commit.  b backs out and forwards commit
            #     request on to c.  c waits on active event map lock
            #     before receiving request for commit.  a tells b to back
            #     out and forwards the request to b to backout, which
            #     forwards the request on to c.  Then, if c reads the
            #     backout before the request to commit, we may get to this
            #     point.  Just ignore the request.
            pass
        else:
            evt.forward_commit_request_and_try_holding_commit_on_myself(
                self.from_partner)        

        if __debug__:
            logging_info = {
                'mod': 'ReceiveRequestCommitAction',
                'endpoint_string':  self.local_endpoint._endpoint_uuid_str
                }
            log_msg = 'End receive request commit action ' + str(self.event_uuid)
            util.get_logger().info(log_msg,extra=logging_info)
Esempio n. 7
0
    def __init__(self, *args, **kwargs):
        super(Faucet, self).__init__(*args, **kwargs)

        # There doesnt seem to be a sensible method of getting command line
        # options into ryu apps. Instead I am using the environment variable
        # FAUCET_CONFIG to allow this to be set, if it is not set it will
        # default to valve.yaml
        sysprefix = get_sys_prefix()
        self.config_file = os.getenv(
            'FAUCET_CONFIG', sysprefix + '/etc/ryu/faucet/faucet.yaml')
        self.logfile = os.getenv(
            'FAUCET_LOG', sysprefix + '/var/log/ryu/faucet/faucet.log')
        self.exc_logfile = os.getenv(
            'FAUCET_EXCEPTION_LOG',
            sysprefix + '/var/log/ryu/faucet/faucet_exception.log')

        # Set the signal handler for reloading config file
        signal.signal(signal.SIGHUP, self.signal_handler)

        # Create dpset object for querying Ryu's DPSet application
        self.dpset = kwargs['dpset']

        # Setup logging
        self.logger = get_logger(
            self.logname, self.logfile, logging.DEBUG, 0)
        # Set up separate logging for exceptions
        self.exc_logger = get_logger(
            self.exc_logname, self.exc_logfile, logging.DEBUG, 1)

        # Set up a valve object for each datapath
        self.valves = {}
        self.config_hashes, valve_dps = dp_parser(
            self.config_file, self.logname)
        for valve_dp in valve_dps:
            # pylint: disable=no-member
            valve = valve_factory(valve_dp)
            if valve is None:
                self.logger.error(
                    'Hardware type not supported for DP: %s', valve_dp.name)
            else:
                self.valves[valve_dp.dp_id] = valve(valve_dp, self.logname)

        self.gateway_resolve_request_thread = hub.spawn(
            self.gateway_resolve_request)
        self.host_expire_request_thread = hub.spawn(
            self.host_expire_request)

        self.dp_bgp_speakers = {}
        self._reset_bgp()

        # Register to API
        api = kwargs['faucet_api']
        api._register(self)
        self.send_event_to_observers(EventFaucetAPIRegistered())
Esempio n. 8
0
def set_logging_level(level):
    '''
    Programmer can set level of logging he/she desires.  Note: mostly used
    internally for compiler development.

    Args: 
      level (int): See Python's internal logging module.
      Options are logging.CRITICAL, logging.INFO, logging.DEBUG, etc.    
    '''

    util.get_logger().setLevel(level)
 def run(self):
     '''
     Event loop.  Keep on reading off queue and servicing.
     '''
     while True:
         service_action = self.threadsafe_queue.get()
         if __debug__:
             util.get_logger().debug(
                 ('Servicing action.  Remaining queue size: %s' %
                  str(self.threadsafe_queue.qsize())),
                 extra= self.logging_info)
         service_action.service()
Esempio n. 10
0
    def run(self):
        if __debug__:
            logging_info = {
                'mod': 'ReceivePartnerReadyAction',
                'endpoint_string': self.local_endpoint._endpoint_uuid_str
                }
            log_msg = 'Start receive ready action '
            util.get_logger().info(log_msg,extra=logging_info)

        self.local_endpoint._other_side_ready()

        if __debug__:
            log_msg = 'End receive ready action '
            util.get_logger().info(log_msg,extra=logging_info)
Esempio n. 11
0
 def loop_error_item(self, loggers=['validation']):
     for meta, unit_datas in self._raw:
         error_datas = []
         for row_data in unit_datas:
             error_count = 0
             for logger in loggers:
                 errors = get_logger(logger, row_data)
                 error_count += len(errors) if errors else 0
                 for col_data in itertools.chain(row_data['raw'], row_data['eval'], row_data['extend']):
                     errors = get_logger(logger, col_data)
                     error_count += len(errors) if errors else 0
             if error_count>0:
                 error_datas.append(row_data)
         if len(error_datas)>0:
             yield meta, error_datas
Esempio n. 12
0
 def str_item_error(self, row_data, loggers=['validation'], html=False):
     link_str = '<br>' if html else '\n'
     msg =u'错误信息:'
     for logger in loggers:
         msg += link_str
         msg += u'--- 错误类型:{0} ---'.format(logger)
         errors = get_logger(logger, row_data)
         if errors and len(errors)>0:
             msg += link_str
             msg += ';'.join(errors)
         for col_data in itertools.chain(row_data['raw'], row_data['eval'], row_data['extend']):
             errors = get_logger(logger, col_data)
             if errors and len(errors)>0:
                 msg += link_str
                 msg += '%s: %s' % (col_data['key'], ';'.join(errors))
     return msg
Esempio n. 13
0
def router():

    _logger = get_logger(__name__)
    if request.form.get("token") == os.environ.get("SLACK_WEBHOOK_SECRET"):

        # Get info from incoming request
        channel_id = request.form.get("channel_id")
        user = request.form.get("user_name")
        message = request.form.get("text")
        _logger.info("Incoming message from {0} on {1}: {2}".format(channel_id, user, message))

        # Parse and route
        try:
            response = parse_message(message, user)
        except Exception as e:
            response = fail(e, user)
        slack_client = SlackClient(os.environ.get("SLACK_TOKEN"))
        slack_client.api_call(
            "chat.postMessage",
            channel=channel_id,
            username='******',
            icon_emoji=':sausage:',
            **response
        )

    return Response(), 200
Esempio n. 14
0
 def __init__(self, name, address, port, pubsub, key):
     self.name = name
     self.port = port
     self.address = address
     self.pubsub = pubsub
     self.key = key
     self.logger = util.get_logger("%s.%s" % (self.__module__, self.__class__.__name__))
Esempio n. 15
0
 def run(self):
     if __debug__:
         logging_info = {
             'mod': 'ReceivePeeredModifiedMsgAction',
             'endpoint_string': self.local_endpoint._endpoint_uuid_str
             }
         log_msg = 'Start receive peered mod msg ' + str(self.msg.event_uuid)
         util.get_logger().info(log_msg,extra=logging_info)
     
     event_uuid = self.msg.event_uuid
     event = self.local_endpoint._act_event_map.get_or_create_partner_event(event_uuid)
     event.generate_partner_modified_peered_response(self.msg)
     
     if __debug__:
         log_msg = 'End receive peered mod msg ' + event.str_uuid
         util.get_logger().info(log_msg,extra=logging_info)
Esempio n. 16
0
 def __init__(self, port):
   ReliableChatServerSocket.__init__(self, port)
   self.msg_acks = {} #hashcode -> [clients]
   self.sent_msgs = {} #who has been sent what?
   self.all_msgs = {} #hashcode -> msg
   self.identity = {} #socket_ptr -> name
   self.logger = get_logger(self)
Esempio n. 17
0
 def __init__(self):
     self.nodes = dict()
     self.clients_pubsub = PubSub(self, pub_port=settings.CLIENT_SUB, sub_port=settings.CLIENT_PUB, broadcast=False)
     self.nodes_pubsub = PubSub(self, pub_port=settings.NODE_SUB, sub_port=settings.NODE_PUB, parse_message=self.parse_message)
     self.logger = util.get_logger("%s.%s" % (self.__module__, self.__class__.__name__))
     Logger(self)
     self.run()
Esempio n. 18
0
 def __init__(self, user_name, server_loc):
   self.logger = get_logger(self)
   super(ReliableChatClient, self).__init__(*server_loc)
   self.user_name = user_name
   self.msg_stack = [] #(timestmap, msg), kept sorted
   self.acked_messages = {}
   self.queue_lock = threading.Lock() 
   self.connected = False
Esempio n. 19
0
 def __init__(self, name, *args, **kwargs):
     self.name = name
     self.initializing = True
     if LIVE: self.interface_kit = InterfaceKit()
     self.manager = PubSub(self, pub_port=settings.NODE_PUB, sub_port=settings.NODE_SUB, sub_filter=self.name)
     self.logger = util.get_logger("%s.%s" % (self.__module__, self.__class__.__name__))
     self.initialize()    
     self.run()
Esempio n. 20
0
 def __init__(self):
     self._logger = util.get_logger(__name__, log_file=self._log_file)
     self._init_devices()
     
     # Not sure if this is the right place for this screens saver object but
     # I don't want to put it in busted() because that would instantiate it 
     # every time busted() is triggered
     self._screen_saver = Screen_Saver()
Esempio n. 21
0
 def __init__(self, input, output, min, max, state, current_state):
     self.input = input
     self.min = min
     self.max = max
     self.output = output
     self.state = state
     self.current_state = current_state
     self.logger = util.get_logger("%s.%s" % (self.__module__, self.__class__.__name__))
Esempio n. 22
0
 def __init__(self, index, display, interface, change=2, data_rate=64):
     self.index = index
     self.display = display
     self.id = util.slugify(display)
     self.change = change
     self.data_rate = data_rate
     self.interface = interface
     self.logger = util.get_logger("%s.%s" % (self.__module__, self.__class__.__name__))
Esempio n. 23
0
 def echo_error(self, loggers=['validation']):
     error_total = 0
     for meta, error_items in self._raw:
         print self.str_meta(meta)
         for row_data in error_items:
             error_count = 0
             for logger in loggers:
                 errors = get_logger(logger, row_data)
                 error_count += len(errors) if errors else 0
                 for col_data in itertools.chain(row_data['raw'], row_data['eval'], row_data['extend']):
                     errors = get_logger(logger, col_data)
                     error_count += len(errors) if errors else 0
             if error_count>0:
                 error_total += error_count
                 print self.str_item(row_data)
                 print self.str_item_error(row_data, loggers=loggers)
     print u'错误记录条数: {0}'.format(error_total)
Esempio n. 24
0
def _setup_logging():
    '''
    Internal function.  Not to be used by programmer.
    '''
    DEFAULT_LOG_FILENAME = 'log.txt'
    DEFAULT_LOGGING_LEVEL = logging.CRITICAL
    
    format_ = (
        '%(levelname)s : %(asctime)s.%(msecs)f: %(endpoint_string)s : %(mod)s \n     %(message)s')
        # '%(levelname)s : %(asctime)s.%(msecs).03d: %(endpoint_string)s : %(mod)s \n     %(message)s')
    logging.basicConfig(
        format=format_, filename=DEFAULT_LOG_FILENAME, level=DEFAULT_LOGGING_LEVEL,
        datefmt='%I:%M:%S')

    util.get_logger().critical(
        '***** New *****', extra={'mod': 'NEW', 'endpoint_string': 'NEW'})

    util.lock_log('***** New *****')
Esempio n. 25
0
 def ofchannel_log(self, ofmsgs):
     if self.dp is not None:
         if self.dp.ofchannel_log is not None:
             self.ofchannel_logger = util.get_logger(
                 self.dp.ofchannel_log,
                 self.dp.ofchannel_log,
                 logging.DEBUG,
                 0)
             for ofmsg in ofmsgs:
                 self.ofchannel_logger.debug(ofmsg)
Esempio n. 26
0
def main_test():
    import util
    test_urls = {
            'dd3322be6b143c6a842acdb4bb5e9f60': 'http://localhost/w/dl/20140728233100.ts',
            # '0d851220f47e7aed4615aebbd5cd2c7a': 'http://localhost/w/dl/test.jpg'
    }
    log = util.get_logger()
    ttttt(1, test_urls, log)
    ttttt(3, test_urls, log)
    ttttt(4, test_urls, log)
Esempio n. 27
0
def ws_test(log=None):
    if log is None:
        import util
        log = util.get_logger()
    ws = WorkShop(tmin=5, tmax=20, log=log)
    i = 0
    total = 0
    tasks = []
    try:
        ws.serve()
        while True:
            task = TaskTest(randint(0, 10), name='T_%05d' % i, log=log)
            task.makeSubWorks()
            assert task.subWorks is not None
            # wk.cancel()
            ws.addTask(task)
            tasks.append(task)
            i += 1
            total += 1
            log.error(' ||||||||||||||| tasks = %d', ws.currTaskSize)
            if i < 190:
                _sleep(0.3)
            else:
                _sleep(0.6)
            if i > 200:
                break
    except Exception as e:
        log.exception(e)
        raise
    finally:
        # _sleep(1)
        ws.setToStop()
        ws.join()
        canceled_total = unknow_total = archived_total = err_total = 0
        for task in tasks:
            log.error('[%s] status=%d', task.name, task.status)
            if task.isArchived():
                archived_total += 1
            elif task.isError():
                err_total += 1
            elif task.status == 3:
                canceled_total += 1
            else:
                unknow_total += 1
            # if task.isArchived() == task.isError():
            #     _sleep(0.3)
            #     for wk in task.subworks:
            #         print wk.status
        log.error('TASK: total=%d, exec=%d, arc=%d, canc=%d, err=%d, un=%d, clean=%d',
                  total, TaskTest.EXEC_TOTAL, archived_total, canceled_total,
                  err_total, unknow_total, TaskTest.CLEANUP)
        log.error('WORK: total=%d, exec=%d', WorkTest.TOTAL, WorkTest.EXEC_TOTAL)
        assert unknow_total == 0
        assert TaskTest.CLEANUP == total
        assert archived_total + err_total + canceled_total == TaskTest.TOTAL
Esempio n. 28
0
 def __init__(self, input, output, state, set_point, update=60, check=30, P=2.0, I=0.0, D=1.0, Derivator=0, Integrator=0, Integrator_max=500, Integrator_min=-500):
     self.input = input
     self.output = output
     self.state = state
     self.set_point = set_point
     self.pid = pid.PID(3.0,0.4,1.2)
     self.pid.setPoint(set_point)
     self.logger = util.get_logger("%s.%s" % (self.__module__, self.__class__.__name__))
     self.update = update
     self.check = check
     gevent.spawn(self.run)
Esempio n. 29
0
    def run(self):
        event_uuid = self.partner_request_block_msg.event_uuid
        
        if __debug__:
            logging_info = {
                'mod': 'ReceivePartnerMesssageRequestSequenceBlockAction',
                'endpoint_string': self.local_endpoint._endpoint_uuid_str
                }
            log_msg = 'Start sequence action for ' + str(event_uuid)
            util.get_logger().info(log_msg,extra=logging_info)

        
        evt = self.local_endpoint._act_event_map.get_or_create_partner_event(
            event_uuid)
        
        evt.recv_partner_sequence_call_msg(self.partner_request_block_msg)
        
        if __debug__:
            log_msg = 'End sequence action for ' + evt.str_uuid
            util.get_logger().info(log_msg,extra=logging_info)
Esempio n. 30
0
 def ofchannel_log(self, ofmsgs):
     """Log OpenFlow messages in text format to debugging log."""
     if self.dp is not None:
         if self.dp.ofchannel_log is not None:
             self.ofchannel_logger = util.get_logger(
                 self.dp.ofchannel_log,
                 self.dp.ofchannel_log,
                 logging.DEBUG,
                 0)
             for ofmsg in ofmsgs:
                 self.ofchannel_logger.debug(ofmsg)
Esempio n. 31
0
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as sched
import torch.utils.data as data
from torch.utils.data import DataLoader, TensorDataset
from torch import Tensor

if __name__ == '__main__':

    args = get_test_args()

    # Set up logging
    args.save_dir = util.get_save_dir(args.save_dir, args.name, subdir='test')
    log = util.get_logger(args.save_dir, args.name)
    device, gpu_ids = util.get_available_devices()
    args.batch_size *= max(1, len(gpu_ids))

    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')

    # Load the checkpoint if given as parameter
    if args.load_path:
        log.info(f'Loading checkpoint from {args.load_path}...')
        model = util.load_model(args.load_path)

    else:
        # Get model
        log.info('Building model...')
        model = util.get_model_class(args.model)(args)
Esempio n. 32
0
if __name__ == '__main__':
    # 是否是预览模式,如果关闭则直接删除文件
    preview = False
    # 需要检查的POJO里java文件目录,只支持检查POJO,轻易不要尝试检查其他的类。。。
    dto_path = '/Users/wangxiaolei/Documents/ody/code/baseline/web/promotion/2.9.6/promotion-model/src/main'
    # 需要检查POJO在哪些目录的java和xml文件里是否引用
    target_paths = [
        '/Users/wangxiaolei/Documents/ody/code/baseline/web/promotion/2.9.6/promotion-model/src/main',
        '/Users/wangxiaolei/Documents/ody/code/baseline/web/promotion/2.9.6/promotion-dao/src/main',
        '/Users/wangxiaolei/Documents/ody/code/baseline/web/promotion/2.9.6/promotion-business/src/main',
        '/Users/wangxiaolei/Documents/ody/code/baseline/web/promotion/2.9.6/basics-promotion-service/src/main',
        '/Users/wangxiaolei/Documents/ody/code/baseline/web/promotion/2.9.6/back-promotion-web/src/main'
    ]

    logger = util.get_logger('find_class_used')
    logger.debug('开始运行')
    class_list = []
    files = util.list_file(dto_path, '.java')
    for i in files:
        class_list.append(ClassFile(i))
    logger.debug('找到 %s 个文件需要查找引用' % class_list.__len__())

    files = util.list_file(paths=target_paths, patterns=['.java', '.xml'])
    for i in files:
        logger.info('开始在文件 %s 中查找' % i)
        with open(i, 'r') as f:
            content = f.read()
            for j in class_list:
                logger.debug('开始查找 class: %s' % j.class_name)
                # 首先最简单的,判断是否有全路径名引入
Esempio n. 33
0
import aiohttp
import asyncio
import redis
import json
import util

logger = util.get_logger("api")

CACHE_MCAP_RESULT_KEY = 'beatrice_cmccache'

BINANCE_URL = 'https://www.binance.com/api/v3/ticker/price?symbol=NANOBTC'
KUCOIN_URL = 'https://api.kucoin.com/v1/open/tick?symbol=NANO-BTC'
NANEX_URL = 'https://nanex.co/api/public/ticker/btcnano'
CMC_URL = 'https://api.coinmarketcap.com/v2/ticker/1567/'
CMC_BTC_URL = 'https://api.coinmarketcap.com/v2/ticker/1/'
BANANO_URL = 'https://api.creeper.banano.cc/ticker'


async def json_get(reqUrl):
    try:
        async with aiohttp.ClientSession() as session:
            async with session.get(reqUrl, timeout=10) as resp:
                jsonResp = await resp.json()
                return jsonResp
    except BaseException:
        return None


async def get_banano_price():
    response = await json_get(BANANO_URL)
    if response is not None:
Esempio n. 34
0
# -*- encoding: utf-8 -*-
# Created on 2016-07-26 11:04:34
import datetime
from abc import ABCMeta, abstractmethod
from multiprocessing import Process

from peewee import SQL

import model
import util
from kuaidi100 import Kuaidi100ComponentImpl
from mojo_qq import MoJoQQComponentImpl
from mojo_wx import MoJoWXComponentImpl
from package_tracking_repo import PackageTrackingRepoComponentImpl

logger = util.get_logger("PackageTracking")


class PackageTrackingComponent:
    def __init__(self):
        pass

    __metaclass__ = ABCMeta

    @abstractmethod
    def sub_pkg_trk_msg(self, suber_account, suber_nike_name, group_name,
                        group_no, sub_type, sub_source, tracking_no):
        pass

    @abstractmethod
    def qry_pkg_trk_msg(self,
Esempio n. 35
0
def main(args):
    # setup logging
    log = get_logger(args.log)
    log(args)

    jsb_file_loc = "./data/jsb_processed.pkl"
    # ingest training/validation/test data from disk
    data = pickle.load(open(jsb_file_loc, "rb"))
    training_seq_lengths = data['train']['sequence_lengths']
    training_data_sequences = data['train']['sequences']
    test_seq_lengths = data['test']['sequence_lengths']
    test_data_sequences = data['test']['sequences']
    val_seq_lengths = data['valid']['sequence_lengths']
    val_data_sequences = data['valid']['sequences']
    N_train_data = len(training_seq_lengths)
    N_train_time_slices = float(np.sum(training_seq_lengths))
    N_mini_batches = int(N_train_data / args.mini_batch_size +
                         int(N_train_data % args.mini_batch_size > 0))

    log("N_train_data: %d     avg. training seq. length: %.2f    N_mini_batches: %d"
        % (N_train_data, np.mean(training_seq_lengths), N_mini_batches))

    # how often we do validation/test evaluation during training
    val_test_frequency = 50
    # the number of samples we use to do the evaluation
    n_eval_samples = 1

    # package repeated copies of val/test data for faster evaluation
    # (i.e. set us up for vectorization)
    def rep(x):
        y = np.repeat(x, n_eval_samples, axis=0)
        return y

    # get the validation/test data ready for the dmm: pack into sequences, etc.
    val_seq_lengths = rep(val_seq_lengths)
    test_seq_lengths = rep(test_seq_lengths)
    val_batch, val_batch_reversed, val_batch_mask, val_seq_lengths = poly.get_mini_batch(
        np.arange(n_eval_samples * val_data_sequences.shape[0]),
        rep(val_data_sequences),
        val_seq_lengths,
        cuda=args.cuda)
    test_batch, test_batch_reversed, test_batch_mask, test_seq_lengths = poly.get_mini_batch(
        np.arange(n_eval_samples * test_data_sequences.shape[0]),
        rep(test_data_sequences),
        test_seq_lengths,
        cuda=args.cuda)

    # instantiate the dmm
    dmm = DMM(rnn_dropout_rate=args.rnn_dropout_rate,
              num_iafs=args.num_iafs,
              iaf_dim=args.iaf_dim,
              use_cuda=args.cuda)

    # setup optimizer
    adam_params = {
        "lr": args.learning_rate,
        "betas": (args.beta1, args.beta2),
        "clip_norm": args.clip_norm,
        "lrd": args.lr_decay,
        "weight_decay": args.weight_decay
    }
    adam = ClippedAdam(adam_params)

    # setup inference algorithm
    elbo = SVI(dmm.model, dmm.guide, adam, Trace_ELBO())

    # now we're going to define some functions we need to form the main training loop

    # saves the model and optimizer states to disk
    def save_checkpoint():
        log("saving model to %s..." % args.save_model)
        torch.save(dmm.state_dict(), args.save_model)
        log("saving optimizer states to %s..." % args.save_opt)
        adam.save(args.save_opt)
        log("done saving model and optimizer checkpoints to disk.")

    # loads the model and optimizer states from disk
    def load_checkpoint():
        assert exists(args.load_opt) and exists(args.load_model), \
            "--load-model and/or --load-opt misspecified"
        log("loading model from %s..." % args.load_model)
        dmm.load_state_dict(torch.load(args.load_model))
        log("loading optimizer states from %s..." % args.load_opt)
        adam.load(args.load_opt)
        log("done loading model and optimizer states.")

    # prepare a mini-batch and take a gradient step to minimize -elbo
    def process_minibatch(epoch, which_mini_batch, shuffled_indices):
        if args.annealing_epochs > 0 and epoch < args.annealing_epochs:
            # compute the KL annealing factor approriate for the current mini-batch in the current epoch
            min_af = args.minimum_annealing_factor
            annealing_factor = min_af + (1.0 - min_af) * \
                (float(which_mini_batch + epoch * N_mini_batches + 1) /
                 float(args.annealing_epochs * N_mini_batches))
        else:
            # by default the KL annealing factor is unity
            annealing_factor = 1.0

        # compute which sequences in the training set we should grab
        mini_batch_start = (which_mini_batch * args.mini_batch_size)
        mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size,
                                 N_train_data])
        mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end]
        # grab a fully prepped mini-batch using the helper function in the data loader
        mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \
            = poly.get_mini_batch(mini_batch_indices, training_data_sequences,
                                  training_seq_lengths, cuda=args.cuda)
        # do an actual gradient step
        loss = elbo.step(mini_batch, mini_batch_reversed, mini_batch_mask,
                         mini_batch_seq_lengths, annealing_factor)
        # keep track of the training loss
        return loss

    # helper function for doing evaluation
    def do_evaluation():
        # put the RNN into evaluation mode (i.e. turn off drop-out if applicable)
        dmm.rnn.eval()

        # compute the validation and test loss n_samples many times
        val_nll = elbo.evaluate_loss(val_batch, val_batch_reversed,
                                     val_batch_mask,
                                     val_seq_lengths) / np.sum(val_seq_lengths)
        test_nll = elbo.evaluate_loss(
            test_batch, test_batch_reversed, test_batch_mask,
            test_seq_lengths) / np.sum(test_seq_lengths)

        # put the RNN back into training mode (i.e. turn on drop-out if applicable)
        dmm.rnn.train()
        return val_nll, test_nll

    # if checkpoint files provided, load model and optimizer states from disk before we start training
    if args.load_opt != '' and args.load_model != '':
        load_checkpoint()

    #################
    # TRAINING LOOP #
    #################
    times = [time.time()]
    for epoch in range(args.num_epochs):
        # if specified, save model and optimizer states to disk every checkpoint_freq epochs
        if args.checkpoint_freq > 0 and epoch > 0 and epoch % args.checkpoint_freq == 0:
            save_checkpoint()

        # accumulator for our estimate of the negative log likelihood (or rather -elbo) for this epoch
        epoch_nll = 0.0
        # prepare mini-batch subsampling indices for this epoch
        shuffled_indices = np.arange(N_train_data)
        np.random.shuffle(shuffled_indices)

        # process each mini-batch; this is where we take gradient steps
        for which_mini_batch in range(N_mini_batches):
            epoch_nll += process_minibatch(epoch, which_mini_batch,
                                           shuffled_indices)

        # report training diagnostics
        times.append(time.time())
        epoch_time = times[-1] - times[-2]
        log("[training epoch %04d]  %.4f \t\t\t\t(dt = %.3f sec)" %
            (epoch, epoch_nll / N_train_time_slices, epoch_time))

        # do evaluation on test and validation data and report results
        if val_test_frequency > 0 and epoch > 0 and epoch % val_test_frequency == 0:
            val_nll, test_nll = do_evaluation()
            log("[val/test epoch %04d]  %.4f  %.4f" %
                (epoch, val_nll, test_nll))
Esempio n. 36
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info(f'Using random seed {args.seed}...')
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)
    word_vectors_char = util.torch_from_json(args.char_emb_file)

    # Get model
    log.info('Building model...')
    model = BiDAF(word_vectors=word_vectors,
                  word_vectors_char=word_vectors_char,
                  hidden_size=args.hidden_size,
                  drop_prob=args.drop_prob)
    model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        log.info(f'Loading checkpoint from {args.load_path}...')
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    optimizer = optim.Adadelta(model.parameters(),
                               args.lr,
                               weight_decay=args.l2_wd)
    scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        epoch += 1
        log.info(f'Starting epoch {epoch}...')
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)
                cc_idxs = cc_idxs.to(device)
                qc_idxs = qc_idxs.to(device)
                batch_size = cw_idxs.size(0)
                optimizer.zero_grad()

                # Forward
                log_p1, log_p2 = model(cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                y1, y2 = y1.to(device), y2.to(device)
                loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(),
                                         args.max_grad_norm)
                optimizer.step()
                scheduler.step(step // batch_size)
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info(f'Evaluating at step {step}...')
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join(f'{k}: {v:05.2f}'
                                            for k, v in results.items())
                    log.info(f'Dev {results_str}')

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar(f'dev/{k}', v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
Esempio n. 37
0
#!/usr/bin/env python
# Copyright 2016-2017, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Report for how many of our fallback directories are unreachable.
"""

import time
import traceback

import stem.descriptor.remote
import stem.directory

import util

log = util.get_logger('fallback_directories')

NOTIFICATION_THRESHOLD = 50  # send notice if this percentage of fallbacks are unusable
TO_ADDRESSES = [
    '*****@*****.**', '*****@*****.**',
    '*****@*****.**'
]
EMAIL_SUBJECT = 'Fallback Directory Summary (%i/%i, %i%%)'

EMAIL_BODY = """\
%i/%i (%i%%) fallback directories have become slow or unresponsive...

"""

downloader = stem.descriptor.remote.DescriptorDownloader(timeout=30)
Esempio n. 38
0
import zerorpc
from .config import CONNURL
from .message import Message
import threading
from util import get_logger
from .state import *
from .excutor import Excutor

logger = get_logger(__name__, 'e:/{}.log'.format(__name__))


class Commanager:
    def __init__(self, meg: Message, timeout=3):
        self.meg = meg
        self.client = zerorpc.Client()
        self.event = threading.Event()
        self.timeout = timeout
        self.state = WAITNG
        self.excutor = Excutor()

    def sendmeg(self):
        try:
            self.event.clear()
            self.client.connect(CONNURL)
            self.client.send(self.meg.reg())
            while not self.event.wait(self.timeout):
                self.client.send(self.meg.heart_beat())
                if self.state == WAITNG:
                    task_item = self.client.get_task(self.meg.uuid)
                    if task_item:
                        task_id, script, time_out = task_item
Esempio n. 39
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info(f'Using random seed {args.seed}...')
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)
    char_vec = util.torch_from_json(args.char_emb_file)

    # Get model
    log.info('Building model...')
    if args.name == 'baseline':
        model = BiDAF(word_vectors=word_vectors,
                      hidden_size=args.hidden_size,
                      drop_prob=args.drop_prob)
    elif args.name == 'charembeddings':
        model = BiDAFChar(word_vectors=word_vectors,
                          char_vec=char_vec,
                          word_len=16,
                          hidden_size=args.hidden_size,
                          drop_prob=args.drop_prob)
    elif args.name == 'charembeddings2':
        model = BiDAFChar2(word_vectors=word_vectors,
                           char_vec=char_vec,
                           word_len=16,
                           hidden_size=args.hidden_size,
                           drop_prob=args.drop_prob)
    elif args.name == 'qanet':
        model = QANet(word_vectors=word_vectors,
                      char_vec=char_vec,
                      word_len=16,
                      emb_size=args.hidden_size,
                      drop_prob=args.drop_prob,
                      enc_size=args.enc_size,
                      n_head=args.n_head,
                      LN_train=args.ln_train,
                      DP_residual=args.dp_res,
                      mask_pos=args.mask_pos,
                      two_pos=args.two_pos,
                      total_prob=args.total_drop,
                      final_prob=args.final_prob)
    elif args.name == 'qanet2':
        model = QANet2(word_vectors=word_vectors,
                       char_vec=char_vec,
                       word_len=16,
                       emb_size=args.hidden_size,
                       drop_prob=args.drop_prob,
                       enc_size=args.enc_size,
                       n_head=args.n_head,
                       LN_train=args.ln_train,
                       DP_residual=args.dp_res,
                       mask_pos=args.mask_pos,
                       two_pos=args.two_pos,
                       rel=args.rel_att,
                       total_prob=args.total_drop,
                       final_prob=args.final_prob,
                       freeze=args.freeze_emb)
    elif args.name == 'qanet3':
        model = QANet3(word_vectors=word_vectors,
                       char_vec=char_vec,
                       word_len=16,
                       emb_size=args.hidden_size,
                       drop_prob=args.drop_prob,
                       enc_size=args.enc_size,
                       n_head=args.n_head,
                       LN_train=args.ln_train,
                       DP_residual=args.dp_res,
                       mask_pos=args.mask_pos,
                       two_pos=args.two_pos,
                       rel=args.rel_att,
                       total_prob=args.total_drop,
                       final_prob=args.final_prob,
                       freeze=args.freeze_emb)
    elif args.name == 'qanet4':
        model = QANet4(word_vectors=word_vectors,
                       char_vec=char_vec,
                       word_len=16,
                       emb_size=args.hidden_size,
                       drop_prob=args.drop_prob,
                       enc_size=args.enc_size,
                       n_head=args.n_head,
                       LN_train=args.ln_train,
                       DP_residual=args.dp_res,
                       mask_pos=args.mask_pos,
                       two_pos=args.two_pos,
                       rel=args.rel_att,
                       total_prob=args.total_drop,
                       final_prob=args.final_prob,
                       freeze=args.freeze_emb)
    else:
        raise ValueError('Wrong model name')

    model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        log.info(f'Loading checkpoint from {args.load_path}...')
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler

    if args.name == 'qanet':
        optimizer = optim.Adam(model.parameters(),
                               args.lr,
                               betas=(0.8, 0.999),
                               weight_decay=3 * 1e-7,
                               eps=1e-7)
        scheduler = warmup(optimizer, 1, 2000)
    elif args.opt == 'adam':
        if args.grad_cent:
            optimizer = AdamWGC(model.parameters(),
                                args.lr,
                                betas=(0.9, 0.999),
                                weight_decay=3 * 1e-7,
                                eps=1e-7,
                                use_gc=True)
        else:
            optimizer = AdamW(model.parameters(),
                              args.lr,
                              betas=(0.8, 0.999),
                              weight_decay=3 * 1e-7,
                              eps=1e-7)
        scheduler = warmup(optimizer, 1, 2000)
    elif args.opt == 'adadelta':
        optimizer = optim.Adadelta(model.parameters(),
                                   args.lr,
                                   weight_decay=3 * 1e-7)
        scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR
    elif args.opt == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              args.lr,
                              weight_decay=3 * 1e-7)
        scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    i = 0
    while epoch != args.num_epochs:
        epoch += 1
        log.info(f'Starting epoch {epoch}...')
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)
                batch_size = cw_idxs.size(0)

                # Forward
                log_p1, log_p2 = model(cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                y1, y2 = y1.to(device), y2.to(device)
                loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                loss_val = loss.item()
                i += 1
                loss /= args.acc_step

                # Backward
                loss.backward()
                if i % args.acc_step == 0:
                    nn.utils.clip_grad_norm_(model.parameters(),
                                             args.max_grad_norm)
                    optimizer.step()
                    scheduler.step(i // (args.acc_step))
                    ema(model, i // (args.acc_step))
                    optimizer.zero_grad()

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0 and i % args.acc_step == 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info(f'Evaluating at step {step}...')
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join(f'{k}: {v:05.2f}'
                                            for k, v in results.items())
                    log.info(f'Dev {results_str}')

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar(f'dev/{k}', v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
Esempio n. 40
0
            bars_cdf,
            color='black',
            width=bar_width,
            edgecolor='white',
            label='C-DF')
    plt.ylabel('Amount of resources', fontsize=16)
    plt.xticks([r + bar_width for r in range(len(bars_ca))],
               ['(1) Relations', '(2) Types'],
               fontsize=16)
    plt.yticks(fontsize=14)
    plt.legend(fontsize=15)
    ax = plt.gca()
    ax.yaxis.grid()

    plt.savefig(
        util.get_results_file('results.graphs.dbpedia_unknown_resources'),
        bbox_inches='tight')


# --- START SCRIPT ---

if __name__ == '__main__':
    now = datetime.datetime.now()
    util.get_logger().info('Started graph generation.')

    generate_graphs()

    duration = (datetime.datetime.now() - now).seconds // 60
    util.get_logger().info(
        f'Finished graph generation after {duration} minutes.')
Esempio n. 41
0
import gzip
import glob
import os
import multiprocessing
import json
import numpy as np
from tqdm import tqdm
from pytorch_pretrained_bert.tokenization import BertTokenizer
import pickle
from functools import partial
from util import get_logger
import argparse

logger = get_logger('.', 'setup')


def get_setup_args():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument(
        "--bert_model",
        default="bert-base-uncased",
        type=str,
        help="Bert pre-trained model selected in the list: bert-base-uncased, "
        "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, "
        "bert-base-multilingual-cased, bert-base-chinese.")
    parser.add_argument("--is_eval",
                        default=False,
                        type=bool,
                        help="whether to downsample the candidates")
Esempio n. 42
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-'

import sqlite3
from sqlite3 import Error

from util import get_logger
from datetime import datetime

logger = get_logger("cdn")


class DBController:
    db_path = "single_domain.db"

    def __init__(self):
        super(DBController, self).__init__()
        self.setup_db()

    def setup_db(self):
        self.create_table()

    def _execute_query(self, sql, values=None):
        try:
            conn = self.get_connection()
            cur = conn.cursor()
            if not values:
                cur.execute(sql)
            else:
                cur.execute(sql, values)
            conn.commit()
Esempio n. 43
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True)))
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info('Using random seed {}...'.format(args.seed))
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)

    # Get model
    log.info('Building model...')
    model = BiDAF(word_vectors=word_vectors,
                  hidden_size=args.hidden_size,
                  drop_prob=args.drop_prob)
    model = nn.DataParallel(model, args.gpu_ids)
    if args.load_path:
        log.info('Loading checkpoint from {}...'.format(args.load_path))
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    optimizer = optim.Adadelta(model.parameters(),
                               args.lr,
                               weight_decay=args.l2_wd)
    scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SegmentSQuAD(args.train_record_file, args.use_squad_v2)
    #train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        epoch += 1
        log.info('Starting epoch {}...'.format(epoch))
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)
                batch_size = cw_idxs.size(0)
                optimizer.zero_grad()
                y1, y2 = y1.to(device), y2.to(device)

                # Forward
                loss = 0
                for i in range(batch_size):
                    max_p_sum = 0
                    max_p_sum_idx = 0
                    for j in range(cw_idxs.size(1)):
                        # Deal with the case when all the words in the window are padded words
                        if cw_idxs[i, j].sum().item() == 0:
                            continue
                        log_p1_j, log_p2_j = model(cw_idxs[i, j].unsqueeze(0),
                                                   qw_idxs[i].unsqueeze(0))
                        max_log_p1_j = torch.max(log_p1_j.detach())
                        max_log_p2_j = torch.max(log_p2_j.detach())
                        max_p_sum_idx = j if (max_log_p1_j + max_log_p2_j
                                              ) > max_p_sum else max_p_sum_idx
                        max_p_sum = max_log_p1_j + max_log_p2_j if (
                            max_log_p1_j +
                            max_log_p2_j) > max_p_sum else max_p_sum
                    log_p1_max, log_p2_max = model(
                        cw_idxs[i, max_p_sum_idx].unsqueeze(0),
                        qw_idxs[i].unsqueeze(0))

                    # Adjust label to the window case
                    if max_p_sum_idx * train_dataset.stride + torch.argmax(
                            log_p1_max).item() == y1[i].item():
                        loss += F.nll_loss(
                            log_p1_max,
                            torch.argmax(log_p1_max).unsqueeze(0))
                    else:
                        loss += F.nll_loss(
                            log_p1_max,
                            torch.argmin(log_p1_max).unsqueeze(0))

                    if max_p_sum_idx * train_dataset.stride + torch.argmax(
                            log_p2_max).item() == y2[i].item():
                        loss += F.nll_loss(
                            log_p2_max,
                            torch.argmax(log_p2_max).unsqueeze(0))
                    else:
                        loss += F.nll_loss(
                            log_p2_max,
                            torch.argmin(log_p2_max).unsqueeze(0))

                loss_val = loss.item()

                # # Forward
                # log_p1, log_p2 = model(cw_idxs, qw_idxs)
                # loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                # loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(),
                                         args.max_grad_norm)
                optimizer.step()
                scheduler.step(step // batch_size)
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch, NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info('Evaluating at step {}...'.format(step))
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join('{}: {:05.2f}'.format(k, v)
                                            for k, v in results.items())
                    log.info('Dev {}'.format(results_str))

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar('dev/{}'.format(k), v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
Esempio n. 44
0
def main():
    # define parser and arguments
    args = get_train_test_args()

    util.set_seed(args.seed)
    model = DistilBertForQuestionAnswering.from_pretrained(
        "distilbert-base-uncased")
    tokenizer = DistilBertTokenizerFast.from_pretrained(
        'distilbert-base-uncased')
    '''###'''
    # if args.reinit_pooler:
    #     encoder_temp = getattr(model, "distilbert")  # Equivalent to model.distilbert
    #     encoder_temp.pooler.dense.weight.data.normal_(mean=0.0, std=encoder_temp.config.initializer_range)
    #     encoder_temp.pooler.dense.bias.data.zero_()  # The change of encoder_temp would affect the model
    #     for p in encoder_temp.pooler.parameters():
    #         p.requires_grad = True

    if args.reinit_layers > 0:
        import torch.nn as nn
        from transformers.models.distilbert.modeling_distilbert import MultiHeadSelfAttention, FFN
        # model_distilbert = getattr(model, "distilbert")  # model.distilbert; change of model_distilbert affects model!
        # Reinitialization for the last few layers
        for layer in model.distilbert.transformer.layer[-args.reinit_layers:]:
            for module in layer.modules():
                # print(module)
                model.distilbert._init_weights(
                    module)  # It's the line equivalent to below approach
                # if isinstance(module, nn.modules.linear.Linear):  # Original form for nn.Linear
                #     # model.config.initializer_range == model.distilbert.config.initializer_range => True
                #     module.weight.data.normal_(mean=0.0, std=model.distilbert.config.initializer_range)
                #     if module.bias is not None:
                #         module.bias.data.zero_()
                # elif isinstance(module, nn.modules.normalization.LayerNorm):
                #     module.weight.data.fill_(1.0)
                #     module.bias.data.zero_()
                # elif isinstance(module, FFN):
                #     for param in [module.lin1, module.lin2]:
                #         param.weight.data.normal_(mean=0.0, std=model.distilbert.config.initializer_range)
                #         if param.bias is not None:
                #             param.bias.data.zero_()
                # elif isinstance(module, MultiHeadSelfAttention):
                #     for param in [module.q_lin, module.k_lin, module.v_lin, module.out_lin]:
                #         param.data.weight.normal_(mean=0.0, std=model.distilbert.config.initializer_range)
                #         if param.bias is not None:
                #             param.bias.data.zero_()

    if args.do_train:
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)
        args.save_dir = util.get_save_dir(args.save_dir, args.run_name)
        log = util.get_logger(args.save_dir, 'log_train')
        log.info(f'Args: {json.dumps(vars(args), indent=4, sort_keys=True)}')
        log.info("Preparing Training Data...")
        args.device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        trainer = Trainer(args, log)
        train_dataset, _ = get_dataset(args, args.train_datasets,
                                       args.train_dir, tokenizer, 'train')
        log.info("Preparing Validation Data...")
        val_dataset, val_dict = get_dataset(args, args.train_datasets,
                                            args.val_dir, tokenizer, 'val')
        train_loader = DataLoader(
            train_dataset,
            batch_size=args.batch_size,
            sampler=RandomSampler(
                train_dataset))  # For squad: 50537/16~3159 items/batches
        val_loader = DataLoader(val_dataset,
                                batch_size=args.batch_size,
                                sampler=SequentialSampler(val_dataset))
        best_scores = trainer.train(model, train_loader, val_loader, val_dict)
    if args.do_eval:
        args.device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        split_name = 'test' if 'test' in args.eval_dir else 'validation'
        log = util.get_logger(args.save_dir, f'log_{split_name}')
        trainer = Trainer(args, log)
        checkpoint_path = os.path.join(args.save_dir, 'checkpoint')
        model = DistilBertForQuestionAnswering.from_pretrained(
            checkpoint_path)  # Trained model
        model.to(args.device)
        eval_dataset, eval_dict = get_dataset(args, args.eval_datasets,
                                              args.eval_dir, tokenizer,
                                              split_name)
        eval_loader = DataLoader(eval_dataset,
                                 batch_size=args.batch_size,
                                 sampler=SequentialSampler(eval_dataset))
        eval_preds, eval_scores = trainer.evaluate(model,
                                                   eval_loader,
                                                   eval_dict,
                                                   return_preds=True,
                                                   split=split_name)
        results_str = ', '.join(f'{k}: {v:05.2f}'
                                for k, v in eval_scores.items())
        log.info(f'Eval {results_str}')
        # Write submission file
        sub_path = os.path.join(args.save_dir,
                                split_name + '_' + args.sub_file)
        log.info(f'Writing submission file to {sub_path}...')
        with open(sub_path, 'w', newline='', encoding='utf-8') as csv_fh:
            csv_writer = csv.writer(csv_fh, delimiter=',')
            csv_writer.writerow(['Id', 'Predicted'])
            for uuid in sorted(eval_preds):
                csv_writer.writerow([uuid, eval_preds[uuid]])
Esempio n. 45
0
import os
from enum import Enum, auto
from typing import Dict, List

from flask import (Blueprint, flash, redirect, render_template, request,
                   session, url_for)
from lxml import etree

from util import cxml, get_logger, xslt

from .settings import settings

logger = get_logger(__name__)


class VarResolvedSource(Enum):
    NONE = auto()
    SETTINGS = auto()
    SESSION = auto()
    FORM = auto()
    pass


class TemplateVarSpec:
    def __init__(
            self,
            name: str,
            sync_session=True,
            from_settings=True,
            subst_xpath='',
            help=''
Esempio n. 46
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info(f'Using random seed {args.seed}...')
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)
    char_vectors = util.torch_from_json(args.char_emb_file)

    # Get model
    log.info('Building model...')
    if args.model_name == 'sketchy':
        model = SketchyReader(word_vectors=word_vectors,
                              char_vectors=char_vectors,
                              hidden_size=args.hidden_size,
                              char_embed_drop_prob=args.char_embed_drop_prob,
                              num_heads=args.num_heads,
                              drop_prob=args.drop_prob)  # SKETCHY
    elif args.model_name == 'intensive':

        model = IntensiveReader(word_vectors=word_vectors,
                                char_vectors=char_vectors,
                                num_heads=args.num_heads,
                                char_embed_drop_prob=args.char_embed_drop_prob,
                                hidden_size=args.hidden_size,
                                drop_prob=args.drop_prob)  # INTENSIVE
    elif args.model_name == 'retro':

        model = RetroQANet(word_vectors=word_vectors,
                           char_vectors=char_vectors,
                           hidden_size=args.hidden_size,
                           num_heads=args.num_heads,
                           char_embed_drop_prob=args.char_embed_drop_prob,
                           intensive_path=args.load_path_i,
                           sketchy_path=args.load_path_s,
                           gpu_ids=args.gpu_ids,
                           drop_prob=args.drop_prob)  # Outer

    model = nn.DataParallel(model, args.gpu_ids)

    if args.load_path:
        log.info(f'Loading checkpoint from {args.load_path}...')
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # setup losses
    bceLoss = nn.BCELoss()

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    optimizer = optim.Adadelta(model.parameters(), args.lr,
                               weight_decay=args.l2_wd)
    if args.optim == "adam":
        optimizer = optim.Adam(
            model.parameters(), 0.001, betas=(0.8, 0.999), eps=1e-7, weight_decay=3e-7)

    scheduler = sched.LambdaLR(optimizer, lambda s: 1.)  # Constant LR

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)

    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        counter = 0
        epoch += 1
        log.info(f'Starting epoch {epoch}...')
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                counter += 1
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)
                cc_idxs = cc_idxs.to(device)
                qc_idxs = qc_idxs.to(device)
                batch_size = cw_idxs.size(0)
                optimizer.zero_grad()

                # Forward
                y1, y2 = y1.to(device), y2.to(device)
                if args.model_name == 'sketchy':
                    yi = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs)
                    loss = bceLoss(yi, torch.where(
                        y1 == 0, 0, 1).type(torch.FloatTensor))
                elif args.model_name == 'intensive':
                    yi, log_p1, log_p2 = model(
                        cw_idxs, qw_idxs, cc_idxs, qc_idxs)
                    # if counter % 100 == 0:
                    #print(torch.max(log_p1.exp(), dim=1)[0])
                    # $print(torch.max(log_p2.exp(), dim=1)[0])
                    #weights = torch.ones(log_p1.shape[1])
                    #weights[0] = 2/(log_p1.shape[1])
                    #nll_loss = nn.NLLLoss(weight=weights.to(device='cuda:0'))
                    # gt_0 = torch.zeros(yi.shape[0]).to(device)
                    # gt_1 = torch.ones(yi.shape[0]).to(device)
                    loss = args.alpha_1 * bceLoss(yi, torch.where(y1 == 0, 0, 1).type(
                        torch.FloatTensor)) + args.alpha_2 * (F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2))
                    #loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                elif args.model_name == 'retro':
                    log_p1, log_p2 = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs)
                    loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
                else:
                    raise ValueError(
                        'invalid --model_name, sketchy or intensive required')

                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(
                    model.parameters(), args.max_grad_norm)
                optimizer.step()
                scheduler.step(step // batch_size)
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch,
                                         NLL=loss_val)
                tbx.add_scalar('train/' + args.model_name, loss_val, step)
                tbx.add_scalar('train/LR',
                               optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info(f'Evaluating at step {step}...')
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2,
                                                  model_name=args.model_name,
                                                  a1=args.alpha_1,
                                                  a2=args.alpha_2)
                    saver.save(
                        step, model, results[args.metric_name], device, model_name=args.model_name)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join(
                        f'{k}: {v:05.2f}' for k, v in results.items())
                    log.info(f'Dev {results_str}')

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar(f'dev/{k}', v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
Esempio n. 47
0
import numpy as np

import util  # user defined
import config  #

from datetime import datetime, date
from dateutil.relativedelta import relativedelta

try:
    import xlrd

except ImportError:
    print("Please install the following module: 'xlrd'")
    sys.exit(-1)

kpilog = util.get_logger(config.autokpi["logname"])


#-----------------------------------------------
# Create structure to hold plot months
# - returns DataFrame structure
#-----------------------------------------------
def get_plot_months(start_dt, end_dt):

    months = util.get_kpi_months(start_dt, end_dt)

    # get corresponding fyq for each month
    fyq = util.get_month_fyq(months)

    #df = months_df.to_frame()
    months_df = pd.DataFrame(months, columns=["Months"])
Esempio n. 48
0
from datetime import datetime

import pytz
import boto
from boto.s3.bucket import Bucket
import re
import os
import arrow
from util import get_logger

from snakebite.client import Client

log = get_logger('inviso-monitor')

job_pattern = re.compile('job_[0-9]+_[0-9]+', re.IGNORECASE)
EPOCH = datetime(1970, 1, 1, tzinfo=pytz.UTC)


class Cluster:
    def __init__(self, id, name, host, port, namenode, namenode_port,
                 history_server):
        self.id = id
        self.name = name
        self.host = host
        self.port = port
        self.namenode = namenode
        self.namenode_port = namenode_port
        self.history_server = history_server


class Monitor(object):
Esempio n. 49
0
# load the example image and convert it to grayscale
import pytesseract
import util
from bot_config import BotConfig
from common import WINDOW_WIDTH, WINDOW_HEIGHT
from imagesearch import region_grabber, imagesearcharea, imagesearcharea_v2, region_grabber_v2
from util import click_image


class ImageNotFoundException(Exception):
    pass


IMAGE_FOLDER = BotConfig().get_property("General", "image_folder")

logger = util.get_logger()
# TODO test
import emu_manager

hwnd = emu_manager.get_instance(
    int(BotConfig().get_property("Emulator", "use_device")))


def find_text(text, x1, y1, x2, y2):
    import emu_manager
    hwnd = emu_manager.get_instance(
        int(BotConfig().get_property("Emulator", "use_device")))
    image = region_grabber_v2((x1, y1, x2, y2), hwnd)
    # image.save('testarea.png')  # useful for debugging purposes, this will save the captured region as "testarea.png"

    image = np.array(image)
Esempio n. 50
0
import stem.descriptor
import stem.descriptor.remote
import stem.directory

EMAIL_SUBJECT = 'Unable to retrieve tor descriptors'

EMAIL_BODY = """\
Unable to retrieve the present %s...

source: %s
time: %s
error: %s
"""

log = util.get_logger('descriptor_checker')
util.log_stem_debugging('descriptor_checker')


def main():
    # retrieve the server and extrainfo descriptors from any authority

    targets = [
        ('server descriptors', '/tor/server/all.z'),
        ('extrainfo descriptors', '/tor/extra/all.z'),
    ]

    for descriptor_type, resource in targets:
        log.debug("Downloading %s..." % descriptor_type)

        query = stem.descriptor.remote.Query(
Esempio n. 51
0
def main():
    args = get_bert_args()
    assert not (args.do_output
                and args.do_train), 'Don\'t output and train at the same time!'
    if args.do_output:
        sub_dir_prefix = 'output'
    elif args.do_train:
        sub_dir_prefix = 'train'
    else:
        sub_dir_prefix = 'test'
    args.save_dir = util.get_save_dir(args.save_dir, args.name, sub_dir_prefix)
    args.output_dir = args.save_dir

    global logger
    logger = util.get_logger(args.save_dir, args.name)

    if args.doc_stride >= args.max_seq_length - args.max_query_length:
        logger.warning(
            "WARNING - You've set a doc stride which may be superior to the document length in some "
            "examples. This could result in errors when building features from the examples. Please reduce the doc "
            "stride or increase the maximum length to ensure the features are correctly built."
        )

    if not args.evaluate_during_saving and args.save_best_only:
        raise ValueError("No best result without evaluation during saving")

    # Use util.get_save_dir, comment this for now
    # if (
    #     os.path.exists(args.output_dir)
    #     and os.listdir(args.output_dir)
    #     and args.do_train
    #     and not args.overwrite_output_dir
    # ):
    #     raise ValueError(
    #         "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(
    #             args.output_dir
    #         )
    #     )

    # Setup distant debugging if needed
    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd

        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        args.n_gpu = torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend="nccl")
        args.n_gpu = 1
    args.device = device

    # Setup logging
    # logging.basicConfig(
    #     format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
    #     datefmt="%m/%d/%Y %H:%M:%S",
    #     level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
    # )
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        args.local_rank,
        device,
        args.n_gpu,
        bool(args.local_rank != -1),
        args.fp16,
    )

    # Set seed
    set_seed(args)

    # Load pretrained model and tokenizer
    if args.local_rank not in [-1, 0]:
        # Make sure only the first process in distributed training will download model & vocab
        torch.distributed.barrier()

    args.model_type = args.model_type.lower()
    config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
    config = config_class.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )
    tokenizer = tokenizer_class.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name else args.model_name_or_path,
        do_lower_case=args.do_lower_case,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )
    # model = model_class.from_pretrained(
    #     args.model_name_or_path,
    #     from_tf=bool(".ckpt" in args.model_name_or_path),
    #     config=config,
    #     cache_dir=args.cache_dir if args.cache_dir else None,
    # )
    #

    model = BertQA(config_class,
                   model_class,
                   model_type=args.model_name_or_path,
                   do_cls=True)

    if args.local_rank == 0:
        # Make sure only the first process in distributed training will download model & vocab
        torch.distributed.barrier()

    model.to(args.device)

    logger.info("Training/evaluation parameters %s", args)

    # Before we do anything with models, we want to ensure that we get fp16 execution of torch.einsum if args.fp16 is set.
    # Otherwise it'll default to "promote" mode, and we'll get fp32 operations. Note that running `--fp16_opt_level="O2"` will
    # remove the need for this code, but it is still valid.
    if args.fp16:
        try:
            import apex

            apex.amp.register_half_function(torch, "einsum")
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )

    # Training
    if args.do_train:
        train_dataset = load_and_cache_examples(args,
                                                tokenizer,
                                                evaluate=False,
                                                output_examples=False)
        global_step, tr_loss = train(args, train_dataset, model, tokenizer)
        logger.info(" global_step = %s, average loss = %s", global_step,
                    tr_loss)

    # Save the trained model and the tokenizer
    if args.do_train and (args.local_rank == -1
                          or torch.distributed.get_rank() == 0):
        # Create output directory if needed
        if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(args.output_dir)

        logger.info("Saving model checkpoint to %s", args.output_dir)
        # Save a trained model, configuration and tokenizer using `save_pretrained()`.
        # They can then be reloaded using `from_pretrained()`
        # Take care of distributed/parallel training
        model_to_save = model.module if hasattr(model, "module") else model
        # model_to_save.save_pretrained(output_dir)  # BertQA is not a PreTrainedModel class
        torch.save(model_to_save,
                   os.path.join(args.output_dir,
                                'pytorch_model.bin'))  # save entire model
        tokenizer.save_pretrained(args.output_dir)  # save tokenizer
        config.save_pretrained(args.output_dir)  # save config

        # Good practice: save your training arguments together with the trained model
        torch.save(args, os.path.join(args.output_dir, "training_args.bin"))

        # Load a trained model and vocabulary that you have fine-tuned
        # model = model_class.from_pretrained(args.output_dir)  # BertQA is not a PreTrainedModel class
        model = torch.load(os.path.join(args.output_dir, 'pytorch_model.bin'))
        tokenizer = tokenizer_class.from_pretrained(
            args.output_dir, do_lower_case=args.do_lower_case)
        model.to(args.device)

    # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory
    results = {}
    if args.do_eval and args.local_rank in [-1, 0]:
        if args.do_train:
            logger.info(
                "Loading checkpoints saved during training for evaluation")
            checkpoints = [args.output_dir]
            if args.eval_all_checkpoints:
                checkpoints = list(
                    os.path.dirname(c) for c in sorted(
                        glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME,
                                  recursive=True)))
                # logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN)  # Reduce model loading logs
        else:
            logger.info("Loading checkpoint %s for evaluation",
                        args.model_name_or_path)
            checkpoints = [args.eval_dir]
        logger.info("Evaluate the following checkpoints: %s", checkpoints)

        for checkpoint in checkpoints:
            # Reload the model
            global_step = checkpoint.split(
                "-")[-1] if len(checkpoints) > 1 else ""
            # model = model_class.from_pretrained(checkpoint)   # BertQA is not a PreTrainedModel class
            model = torch.load(os.path.join(checkpoint, 'pytorch_model.bin'))
            model.to(args.device)

            # Evaluate
            result = evaluate(args,
                              model,
                              tokenizer,
                              prefix=global_step,
                              save_dir=args.output_dir,
                              save_log_path=os.path.join(
                                  checkpoint, 'eval_result.json'))

            result = dict(
                (k + ("_{}".format(global_step) if global_step else ""), v)
                for k, v in result.items())
            results.update(result)

            logger.info(
                f'Convert format and Writing submission file to directory {args.output_dir}...'
            )

            util.convert_submission_format_and_save(
                args.output_dir,
                prediction_file_path=os.path.join(args.output_dir,
                                                  'predictions_.json'))

    logger.info("Results: {}".format(results))

    if args.do_output and args.local_rank in [-1, 0]:
        assert not args.do_train and not args.do_eval

        logger.info("Loading checkpoint %s for output",
                    args.model_name_or_path)
        checkpoints = [args.eval_dir]

        logger.info("Output the following checkpoints: %s", checkpoints)

        for checkpoint in checkpoints:
            # Reload the model
            global_step = checkpoint.split(
                "-")[-1] if len(checkpoints) > 1 else ""
            model = torch.load(os.path.join(checkpoint, 'pytorch_model.bin'))
            model.to(args.device)

            generate_model_outputs(args,
                                   model,
                                   tokenizer,
                                   is_dev=True,
                                   prefix=global_step,
                                   save_dir=args.output_dir)
    return results
import pandas as pd
import numpy as np
from util import timeclass, timeit, get_logger
import torch

VERBOSITY_LEVEL = 'INFO'
LOGGER = get_logger(VERBOSITY_LEVEL, __file__)


@timeit
def sparse_dot(matrix_a, matrix_b):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    return torch.mm(
        torch.Tensor(matrix_a).to(device),
        torch.Tensor(matrix_b).to(device)).cpu().numpy()


class Feat:
    def __init__(self):
        pass

    @timeclass('Feat')
    def fit_transform(self, table, drop_sum_columns):
        degree_columns = self.degree(table)
        degree_bins_columns = self.degree_bins(table)

        neighbor_columns = self.get_neighbor(table)
        bin_2_neighbor_mean_degree_bins_columns = self.bin_2_neighbor_mean_degree_bins(
            table)

        gnn_append = [
Esempio n. 53
0
 def __init__(self, algor_params, other_params):
     super(KMeansEvaluate, self).__init__(algor_params, other_params)
     self.log = get_logger('KMeansEvaluate')
Esempio n. 54
0
import os
import sys
import argparse
import glob
from pathlib import Path

path_this = os.path.dirname(os.path.abspath(__file__))
sys.path.append(path_this)

from util import get_logger
from notebook import Notebook

logger = get_logger("convert")


def convert_file(arg):
    nb = Notebook(arg['input_file'])
    nb.export(arg['output_file'], img_to=arg['media_folder'])


def convert_folder(arg):
    # first, we need to somehow reconstruct the hierarchy of the input folder
    # to the output folder
    # the way we do that by get the input path, and then subtract it from the
    # actual notes path.
    tot_base_part = len(Path(arg['input_file']).parts)
    for note in glob.iglob(os.path.join(arg['input_file'], '**/*.ipynb'),
                           recursive=True):
        logger.debug("Processing {}".format(note))
        # $note is also a path, but joined with the $input_file
        # get note but with strip base, only preserve the hierarchy
 def __init__(self, conf):
     self.conf = conf
     self.device = torch.device(f"cuda:{conf.gpu_id}")
     self.log = get_logger()
     torch.set_printoptions(precision=8)
     if conf.runid:
         conf.rundir = mkdir(conf.outdir / conf.runid)
     if not conf.rundir:
         conf.rundir = next_rundir(conf.outdir, log=self.log)
     self.rundir = conf.rundir
     dump_args(conf, conf.rundir / "conf.json")
     set_random_seed(conf.random_seed)
     if self.conf.use_bert:
         assert self.conf.lang in Bert.supported_langs, self.conf.lang
         self.bert = Bert(self.conf.bert_model_name, device=self.device)
     else:
         self.bert = None
     self.data = load_dataset(conf, conf.lang, bert=self.bert)
     _data = [self.data]
     for d in _data:
         self.log.info(
             f"{len(d.train_loader)} batches | bs {conf.batch_size}")
     self.model = self.get_model()
     self.optimizer = get_optim(conf, self.model)
     optimum = "min"
     if conf.lr_scheduler == "plateau":
         self.lr_scheduler = ReduceLROnPlateau(self.optimizer,
                                               factor=0.1,
                                               patience=2,
                                               mode=optimum,
                                               verbose=True)
     elif conf.lr_scheduler:
         raise ValueError("Unknown lr_scheduler: " + conf.lr_scheduler)
     self.losses = LossTrackers.from_names("loss", log=self.log)
     if (self.main_lang_data.tag == "ner"
             or self.conf.dataset.startswith("sr3de")):
         if self.data.is_multilingual:
             self.sentence_texts = {
                 split_name: self.main_lang_data.token_texts(split_name)
                 for split_name in ["dev", "test"]
             }
             self.conll_score = {
                 lang: ConllScore(tag_enc=self.main_lang_data.tag_enc)
                 for lang in self.data.dev
             }
             self.score = {
                 lang: Score("f1",
                             save_model=False,
                             log=self.log,
                             score_func=self.conll_score[lang],
                             add_mode="append")
                 for lang in self.data.dev
             }
             self.avg_score = Score("avg_f1",
                                    log=self.log,
                                    score_func="dummy",
                                    add_mode="append")
         else:
             self.sentence_texts = {
                 split_name: self.main_lang_data.token_texts(split_name)
                 [:conf.max_eval_inst]
                 for split_name in ["dev", "test"]
             }
             self.conll_score = ConllScore(
                 tag_enc=self.main_lang_data.tag_enc)
             self.score = Score("f1",
                                log=self.log,
                                score_func=self.conll_score,
                                add_mode="append")
     else:
         if self.data.is_multilingual:
             self.score = {
                 lang: Score("acc", log=self.log)
                 for lang in self.data.dev
             }
             self.avg_score = Score("avg_acc",
                                    log=self.log,
                                    score_func="dummy",
                                    add_mode="append")
         else:
             self.score = Score("acc", log=self.log)
     if conf.early_stop > 0:
         score_optimum = ("max" if
                          (self.conf.dataset.startswith("wikiannmulti")
                           or self.data.is_multilingual) else
                          self.score.optimum)
         self.early_stop = EarlyStopping(
             score_optimum,
             min_delta=conf.early_stop_min_delta,
             patience=conf.early_stop)
     else:
         self.early_stop = None
     self.epoch = 0
Esempio n. 56
0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
"""

__all__ = ("service_map", )

from util import get_logger, conf
from .device import Device
import rgbxy
import datetime
import requests

logger = get_logger(__name__.split(".", 1)[-1])

converter_pool = dict()


def get_gamut(model_id):
    # https://developers.meethue.com/develop/hue-api/supported-devices/
    if model_id in ("LCT001", "LCT007", "LCT002", "LCT003", "LLM001"):
        return rgbxy.GamutB
    elif model_id in ("LCT010", "LCT014", "LCT015", "LCT016", "LCT011",
                      "LLC020", "LST002", "LCT012", "LCT024"):
        return rgbxy.GamutC
    elif model_id in ("LLC010", "LLC006", "LST001", "LLC011", "LLC012",
                      "LLC005", "LLC007", "LLC014"):
        return rgbxy.GamutA
    else:
Esempio n. 57
0
def main(args):
    # Set up logging and devices
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True)
    log = util.get_logger(args.save_dir, args.name)
    tbx = SummaryWriter(args.save_dir)
    device, args.gpu_ids = util.get_available_devices()
    log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True)))
    args.batch_size *= max(1, len(args.gpu_ids))

    # Set random seed
    log.info('Using random seed {}...'.format(args.seed))
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # Get embeddings
    # Args:  word_vectors: word vector tensor of dimension [vocab_size * wemb_dim]
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)
    char_vectors = util.torch_from_json(args.char_emb_file)

    # Get Model
    log.info('Building Model...')
    model = QANet(word_vectors,
                  char_vectors,
                  args.para_limit,
                  args.ques_limit,
                  args.f_model,
                  num_head=args.num_head,
                  train_cemb = (not args.pretrained_char))
    model = nn.DataParallel(model, args.gpu_ids)
    
    if args.load_path:
        log.info('Loading checkpoint from {}...'.format(args.load_path))
        model, step = util.load_model(model, args.load_path, args.gpu_ids)
    else:
        step = 0
    model = model.to(device)
    model.train()
    ema = util.EMA(model, args.ema_decay)

    # Get saver
    saver = util.CheckpointSaver(args.save_dir,
                                 max_checkpoints=args.max_checkpoints,
                                 metric_name=args.metric_name,
                                 maximize_metric=args.maximize_metric,
                                 log=log)

    # Get optimizer and scheduler
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adam(
        params=parameters,
        lr=args.lr,
        betas=(args.beta1, args.beta2),
        eps=1e-8,
        weight_decay=3e-7)
    cr = 1.0 / math.log(args.lr_warm_up_num)
    scheduler = optim.lr_scheduler.LambdaLR(
        optimizer,
        lr_lambda=lambda ee: cr * math.log(ee + 1)
        if ee < args.lr_warm_up_num else 1)
    loss_f = torch.nn.CrossEntropyLoss()

    # Get data loader
    log.info('Building dataset...')
    train_dataset = SQuAD(args.train_record_file, args.use_squad_v2)
    train_loader = data.DataLoader(train_dataset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers,
                                   collate_fn=collate_fn)
    dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2)
    dev_loader = data.DataLoader(dev_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

    # Train
    log.info('Training...')
    steps_till_eval = args.eval_steps
    epoch = step // len(train_dataset)
    while epoch != args.num_epochs:
        epoch += 1
        log.info('Starting epoch {}...'.format(epoch))
        with torch.enable_grad(), \
                tqdm(total=len(train_loader.dataset)) as progress_bar:
            for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader:
                # Setup for forward
                cw_idxs = cw_idxs.to(device)
                qw_idxs = qw_idxs.to(device)

                cc_idxs = cc_idxs.to(device)
                qc_idxs = qc_idxs.to(device)
                
                batch_size = cw_idxs.size(0)
                optimizer.zero_grad()

                # Forward
                log_p1, log_p2 = model(cw_idxs, cc_idxs, qw_idxs, qc_idxs)
                y1, y2 = y1.to(device), y2.to(device)
                loss = torch.mean(loss_f(log_p1, y1) + loss_f(log_p2, y2))
                loss_val = loss.item()

                # Backward
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
                optimizer.step()
                scheduler.step(step // batch_size)
                ema(model, step // batch_size)

                # Log info
                step += batch_size
                progress_bar.update(batch_size)
                progress_bar.set_postfix(epoch=epoch,
                                         NLL=loss_val)
                tbx.add_scalar('train/NLL', loss_val, step)
                tbx.add_scalar('train/LR',
                               optimizer.param_groups[0]['lr'],
                               step)

                steps_till_eval -= batch_size
                if steps_till_eval <= 0:
                    steps_till_eval = args.eval_steps

                    # Evaluate and save checkpoint
                    log.info('Evaluating at step {}...'.format(step))
                    ema.assign(model)
                    results, pred_dict = evaluate(model, dev_loader, device,
                                                  args.dev_eval_file,
                                                  args.max_ans_len,
                                                  args.use_squad_v2)
                    saver.save(step, model, results[args.metric_name], device)
                    ema.resume(model)

                    # Log to console
                    results_str = ', '.join('{}: {:05.2f}'.format(k, v)
                                            for k, v in results.items())
                    log.info('Dev {}'.format(results_str))

                    # Log to TensorBoard
                    log.info('Visualizing in TensorBoard...')
                    for k, v in results.items():
                        tbx.add_scalar('dev/{}'.format(k), v, step)
                    util.visualize(tbx,
                                   pred_dict=pred_dict,
                                   eval_path=args.dev_eval_file,
                                   step=step,
                                   split='dev',
                                   num_visuals=args.num_visuals)
Esempio n. 58
0
def main(args):
    # Set up logging
    args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False)
    log = util.get_logger(args.save_dir, args.name)
    log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}')
    device, gpu_ids = util.get_available_devices()
    args.batch_size *= max(1, len(gpu_ids))

    # Get embeddings
    log.info('Loading embeddings...')
    word_vectors = util.torch_from_json(args.word_emb_file)

    # Get model
    log.info('Building model...')
    model = ModelClass(embeddings=word_vectors, hidden_size=args.hidden_size)
    model = nn.DataParallel(model, gpu_ids)
    log.info(f'Loading checkpoint from {args.load_path}...')
    model = util.load_model(model, args.load_path, gpu_ids, return_step=False)
    model = model.to(device)
    model.eval()

    # Get data loader
    log.info('Building dataset...')
    record_file = vars(args)[f'{args.split}_record_file']
    dataset = SQuAD(record_file, args.use_squad_v2)
    data_loader = data.DataLoader(dataset,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=args.num_workers,
                                  collate_fn=collate_fn)

    # Evaluate
    log.info(f'Evaluating on {args.split} split...')
    nll_meter = util.AverageMeter()
    pred_dict = {}  # Predictions for TensorBoard
    sub_dict = {}  # Predictions for submission
    eval_file = vars(args)[f'{args.split}_eval_file']
    with open(eval_file, 'r') as fh:
        gold_dict = json_load(fh)
    with torch.no_grad(), \
            tqdm(total=len(dataset)) as progress_bar:
        for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader:
            # Setup for forward
            cw_idxs = cw_idxs.to(device)
            qw_idxs = qw_idxs.to(device)
            batch_size = cw_idxs.size(0)

            # Forward
            log_p1, log_p2 = model(cw_idxs, qw_idxs)
            y1, y2 = y1.to(device), y2.to(device)
            loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
            nll_meter.update(loss.item(), batch_size)

            # Get F1 and EM scores
            p1, p2 = log_p1.exp(), log_p2.exp()
            starts, ends = util.discretize(p1, p2, args.max_ans_len,
                                           args.use_squad_v2)

            # Log info
            progress_bar.update(batch_size)
            if args.split != 'test':
                # No labels for the test set, so NLL would be invalid
                progress_bar.set_postfix(NLL=nll_meter.avg)

            idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(),
                                                      starts.tolist(),
                                                      ends.tolist(),
                                                      args.use_squad_v2)
            pred_dict.update(idx2pred)
            sub_dict.update(uuid2pred)

    # Log results (except for test set, since it does not come with labels)
    if args.split != 'test':
        results = util.eval_dicts(gold_dict, pred_dict, args.use_squad_v2)
        results_list = [('NLL', nll_meter.avg), ('F1', results['F1']),
                        ('EM', results['EM'])]
        if args.use_squad_v2:
            results_list.append(('AvNA', results['AvNA']))
        results = OrderedDict(results_list)

        # Log to console
        results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items())
        log.info(f'{args.split.title()} {results_str}')

        # Log to TensorBoard
        tbx = SummaryWriter(args.save_dir)
        util.visualize(tbx,
                       pred_dict=pred_dict,
                       eval_path=eval_file,
                       step=0,
                       split=args.split,
                       num_visuals=args.num_visuals)

    # Write submission file
    sub_path = join(args.save_dir, args.split + '_' + args.sub_file)
    log.info(f'Writing submission file to {sub_path}...')
    with open(sub_path, 'w', newline='', encoding='utf-8') as csv_fh:
        csv_writer = csv.writer(csv_fh, delimiter=',')
        csv_writer.writerow(['Id', 'Predicted'])
        for uuid in sorted(sub_dict):
            csv_writer.writerow([uuid, sub_dict[uuid]])
Esempio n. 59
0
# How many words messages must contain
LAST_MSG_RAIN_WORDS = 3

# (Seconds) How long user must wait between tiprandom
TIP_RANDOM_WAIT = 10
# (Seconds) How long user mus wait between tipfavorites
TIP_FAVORITES_WAIT = 150

db = PooledPostgresqlExtDatabase(settings.database,
                                 user=settings.database_user,
                                 password=settings.database_password,
                                 host='localhost',
                                 port=5432,
                                 max_connections=16)

logger = util.get_logger("db")


### User Stuff
@db.connection_context()
def get_accounts():
    u = User.select(User.wallet_address)
    accts = []
    for a in u:
        accts.append(a.wallet_address)
    return accts


@db.connection_context()
def get_user_by_id(user_id, user_name=None):
    try:
Esempio n. 60
0
def main():
    # define parser and arguments
    args = get_train_test_args()
    util.set_seed(args.seed)
    # model = DistilBertForQuestionAnswering.from_pretrained("distilbert-base-uncased")
    model = DomainQA(args.num_classes, args.hidden_size, args.num_layers,
                     args.dropout, args.dis_lambda, args.concat, args.anneal)
    tokenizer = DistilBertTokenizerFast.from_pretrained(
        'distilbert-base-uncased')

    if args.do_train:
        if not os.path.exists(args.save_dir):
            os.makedirs(args.save_dir)
        args.save_dir = util.get_save_dir(args.save_dir, args.run_name)
        log = util.get_logger(args.save_dir, 'log_train')
        log.info(f'Args: {json.dumps(vars(args), indent=4, sort_keys=True)}')
        log.info("Preparing Training Data...")
        args.device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        if args.load_weights != '':
            args.load_weights = os.path.join(args.load_weights, 'checkpoint',
                                             model.WEIGHTS_NAME)
            model.load_state_dict(torch.load(args.load_weights))
        if args.load_distilbert_weights != '':
            args.load_distilbert_weights = os.path.join(
                args.load_distilbert_weights, 'checkpoint', model.WEIGHTS_NAME)
            model.distilbert.load_state_dict(
                torch.load(args.load_distilbert_weights))
            print('loaded pretrained distilbert weights from',
                  args.load_distilbert_weights)

        #target_data_dir, target_dataset, tokenizer, split_name, source_data_dir = None, source_dataset = None
        train_dataset, _ = get_train_dataset(args, \
                                       args.target_train_dir,\
                                       args.target_train_datasets,\
                                       tokenizer, 'train', \
                                       source_data_dir=args.source_train_dir, \
                                       source_dataset=args.source_train_datasets)
        log.info("Preparing Validation Data...")
        val_dataset, val_dict = get_dataset(args, \
                                       args.eval_datasets,\
                                       args.eval_dir,\
                                       tokenizer, 'val')
        train_loader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  sampler=RandomSampler(train_dataset))
        val_loader = DataLoader(val_dataset,
                                batch_size=args.batch_size,
                                sampler=SequentialSampler(val_dataset))
        # warm up
        if args.max_steps > 0:
            args.t_total = args.max_steps  # Total number of training updates
            args.num_epochs = args.max_steps // (
                len(train_loader) // args.gradient_accumulation_steps) + 1
        else:
            args.t_total = len(
                train_loader
            ) // args.gradient_accumulation_steps * args.num_epochs  # self.gradient_accumulation_steps = 1

        if args.warmup_ratio > 0:
            assert args.warmup_steps == 0
            args.warmup_steps = int(args.warmup_ratio * args.t_total)

        trainer = Trainer(args, log, model)

        best_scores = trainer.train(train_loader, val_loader, val_dict)

    if args.do_eval:
        args.device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        split_name = 'test' if 'test' in args.eval_dir else 'validation'
        log = util.get_logger(args.save_dir, f'log_{split_name}')
        trainer = Trainer(args, log, model)
        config_path = os.path.join(args.save_dir, 'checkpoint', 'config.json')
        checkpoint_path = os.path.join(args.save_dir, 'checkpoint',
                                       model.WEIGHTS_NAME)
        model.load_state_dict(torch.load(checkpoint_path))
        model.to(args.device)
        eval_dataset, eval_dict = get_dataset(args, args.eval_datasets,
                                              args.eval_dir, tokenizer,
                                              split_name)
        eval_loader = DataLoader(eval_dataset,
                                 batch_size=args.batch_size,
                                 sampler=SequentialSampler(eval_dataset))
        eval_preds, eval_scores = trainer.evaluate(eval_loader,
                                                   eval_dict,
                                                   return_preds=True,
                                                   split=split_name)
        results_str = ', '.join(f'{k}: {v:05.2f}'
                                for k, v in eval_scores.items())
        log.info(f'Eval {results_str}')
        # Write submission file
        sub_path = os.path.join(args.save_dir,
                                split_name + '_' + args.sub_file)
        log.info(f'Writing submission file to {sub_path}...')
        with open(sub_path, 'w', newline='', encoding='utf-8') as csv_fh:
            csv_writer = csv.writer(csv_fh, delimiter=',')
            csv_writer.writerow(['Id', 'Predicted'])
            for uuid in sorted(eval_preds):
                csv_writer.writerow([uuid, eval_preds[uuid]])