def load_translation(translation_path):
    """从文件夹中读取所有翻译文件

    Args:
        translation_path (str): 存放翻译 xlsx 文件的路径或文件

    Returns:
        category_to_translated (dict[str: list]): dict<str, list>, 根据 category 归类的翻译
    """
    file_list = []
    if os.path.isfile(translation_path):
        file_list.append((translation_path, translation_path))
    else:
        for dir_path, dir_names, file_names in os.walk(translation_path):
            for file_name in file_names:
                if file_name.lower().endswith('.xlsx') and not file_name.startswith('~'):
                    file_path = os.path.join(dir_path, file_name)
                    file_list.append((file_name, file_path))

    category_to_translated = {}
    for file_name, file_path in file_list:
        # load from one file
        log.info('load from %s' % file_name)
        category, translated_data = load_from_langxls(file_path, "zh", need_check=True)
        log.info('load %d %ss' % (len(translated_data), category))
        if category in category_to_translated:
            log.warning('warning: override category %s' % category)
        category_to_translated[category] = translated_data
    return category_to_translated
Beispiel #2
0
    def multi_trim(cls, files=[], callback_list=[]):
        '''多线程批量文件截取
        :param: files(List): 待剪切文件配置组成的list。
            [
                {
                    'path':'/Users/nut/Downloads/RS/CCAV.mp4',
                    'trim_times':(
                        ("00:50:22", "01:03:27"),
                        ("01:19:39", "01:37:04"), ...
                    )
                }...
            ]
        :param: callback_list(List): 处理完文件剪切后的回调函数名组成的list。
            ['compress', ...]
        '''

        log.warning('线程:%s, 父进程:%s, <Task (%s) start...>' % (
            threading.current_thread().getName(), os.getpid(), sys._getframe().f_code.co_name))

        executor = BoundedExecutor(0, 4)

        for file in files:
            suffix_number = 0
            for time in file.get('trim_times'):
                suffix_number += 1
                log.info(sys._getframe().f_code.co_name,
                         'suffix_number', suffix_number)
                future = executor.submit(cls(file.get(
                    'path')).trim, time=time, suffix_number=suffix_number, lock=executor.lock)
                for callback in callback_list:
                    future.add_done_callback(getattr(cls, callback))
                log.info(sys._getframe().f_code.co_name,
                         'time, suffix_number', time, suffix_number)
Beispiel #3
0
 def get_head_token_idx(self,
                        start_token_idx,
                        end_token_idx,
                        msg_prefix=''):
     self.check_token_idx(start_token_idx)
     self.check_token_idx(end_token_idx - 1)
     assert start_token_idx < end_token_idx, \
         'start_token_idx:{} >= end_token_idx:{}'.format(
             start_token_idx, end_token_idx)
     head_idx_map = []
     for token_idx in range(start_token_idx, end_token_idx):
         head_trace = [token_idx]
         while start_token_idx <= head_trace[-1] < end_token_idx:
             _, head_idx = self.get_parent(head_trace[-1], msg_prefix)
             # warn if there is a loop in finding one token's head token
             if head_idx in head_trace:
                 log.warning(
                     '{}: In sentence #{}, token #{} has loop in its head '
                     'trace list.'.format(msg_prefix, self._sent_idx,
                                          token_idx))
                 break
             head_trace.append(head_idx)
         head_idx_map.append((token_idx, head_trace[-2]))
     head_idx_list = [head_idx for _, head_idx in head_idx_map]
     # warn if the tokens in the range don't have the same head token
     if min(head_idx_list) != max(head_idx_list):
         log.warning(
             '{}: In sentence #{}, tokens within the range [{}, {}] do not '
             'have the same head token'.format(msg_prefix, self._sent_idx,
                                               start_token_idx,
                                               end_token_idx))
     return min(head_idx_list)
Beispiel #4
0
def get_gdrive_sheet(spreadsheet_name, sheet_name, retries=3):
    """
    Get a google drive spreadsheet

    Args:
        spreadsheet_name:   name of the document
        sheet_name:         name of the sheet inside the document
    """

    init_gdrive()

    msg_error = "ConnectionError ({}) when trying to get '{}/{}'. Details: {}"

    # Open sheet in a way we can have some retries
    for x in range(retries):
        try:
            # Get the spreadsheet
            spreadsheet = GDRIVE.open(spreadsheet_name)
            return spreadsheet.worksheet(sheet_name)

        except ConnectionError as e:
            log.warning(msg_error.format(x, spreadsheet_name, sheet_name, e))
            # Sleep to avoid query limitations
            sleep(x * 10)

            # Init gdrive again just in case
            init_gdrive(force=True)

    log.error(msg_error.format("last_attempt", spreadsheet_name, sheet_name, e))
    raise ValueError("Too many reading attemps in 'get_gdrive_sheet'")
Beispiel #5
0
def init_env(config):
    # set a debug environment variable CUBLAS_WORKSPACE_CONFIG to ":16:8" (may limit overall performance) or ":4096:8" (will increase library footprint in GPU memory by approximately 24MiB).
    # https://docs.nvidia.com/cuda/cublas/index.html
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ":4096:8"
    if config.seed is not None:
        if config.seed >= 0:
            np.random.seed(config.seed)
            torch.manual_seed(config.seed)
            torch.set_deterministic(True)
            torch.backends.cudnn.benchmark = False
        else:
            log.warning(f'the random seed should be a non-negative integer')

    config.device = None
    if not config.cpu and torch.cuda.is_available():
        config.device = torch.device('cuda')
    else:
        config.device = torch.device('cpu')
        # https://github.com/pytorch/pytorch/issues/11201
        torch.multiprocessing.set_sharing_strategy('file_system')
    log.info(f'Using device: {config.device}')

    config.run_name = '{}_{}_{}'.format(
        config.data_name,
        Path(config.config).stem if config.config else config.model_name,
        datetime.now().strftime('%Y%m%d%H%M%S'),
    )
    log.info(f'Run name: {config.run_name}')
    return config
Beispiel #6
0
def show(title, options, can_exit=True, main_menu=False, decorator='++'):
    """
    Display a menu

    options: dictionary in which each key is a string and each value is a tuple (string, function), representing
            the text of the function that will be called when the related string in inserted as input
        ex: { 'a', ('option a', print) } : print 'option a' and when 'a' is pressed, call the function 'print'
    """
    log.success('{} {} {}'.format(decorator, title, decorator))
    for s,f in options.items():
        log.warning('({}) {}'.format(s,f[0]))
    if can_exit:
        log.warning('(x) Exit')
    
    wrong_choice = True
    while(wrong_choice):
        arg = input()
        print()

        try:
            if arg=='x' and can_exit:
                wrong_choice = False
                quit_menu(main_menu)
            else:
                funct = options[arg][1]
                wrong_choice = False
                res = funct()
                quit_menu(main_menu)
                return res
        except KeyError as _:
            log.error('Invalid option, retry:')
def load_translation(translation_path):
    """从文件夹中读取所有翻译文件

    Args:
        translation_path (str): 存放翻译 xlsx 文件的路径或文件

    Returns:
        category_to_translated (dict[str: list]): dict<str, list>, 根据 category 归类的翻译
    """
    file_list = []
    if os.path.isfile(translation_path):
        file_list.append((translation_path, translation_path))
    else:
        for dir_path, dir_names, file_names in os.walk(translation_path):
            for file_name in file_names:
                if file_name.lower().endswith(
                        '.xlsx') and not file_name.startswith('~'):
                    file_path = os.path.join(dir_path, file_name)
                    file_list.append((file_name, file_path))

    category_to_translated = {}
    for file_name, file_path in file_list:
        # load from one file
        log.info('load from %s' % file_name)
        category, translated_data = load_from_langxls(file_path,
                                                      "zh",
                                                      need_check=True)
        log.info('load %d %ss' % (len(translated_data), category))
        if category in category_to_translated:
            log.warning('warning: override category %s' % category)
        category_to_translated[category] = translated_data
    return category_to_translated
Beispiel #8
0
def query_pair(origin, destination, n_days=366):
    """
    Query all flights between 2 airports

    Args:
        origin:         code for origin airport
        destination:    code for destination airport
        n_days:         max days of history
    """

    # Start at day 1 since it will only query when day==1
    start_day = date.today()

    dfs = []
    for x in range(n_days):
        query_day = start_day + timedelta(x)

        # Only do first day of month
        if (query_day.day != 1) and (query_day != start_day):
            log.trace(f"Skiping day '{query_day}'")
            continue

        response = query_flights(origin, destination, query_day)
        data = response.json()

        if data["Quotes"]:
            dfs.append(parse_data(data))

    if dfs:
        return pd.concat(dfs).reset_index(drop=True)
    else:
        log.warning(f"No flights from '{origin}' to '{destination}'")
Beispiel #9
0
 def _make_task(self, task_info):
     if task_info.type in self.types:
         task_class = self.types[task_info.type]
         task_data = self.storage[task_info.id]
         return task_class(task_info, task_data, self.signal)
     else:
         log.warning('Cannot handle task type "{}"', task_info.type)
Beispiel #10
0
 def set_head_token_idx(self, dep_graph):
     check_type(dep_graph, DependencyGraph)
     if self.head_token_idx != -1:
         log.warning('Overriding existing head_token_idx {}'.format(
             self.head_token_idx))
     self.head_token_idx = dep_graph.get_head_token_idx(
         self.start_token_idx, self.end_token_idx)
Beispiel #11
0
def download(addon):
    # TODO: prevent multiple downloads.
    current = addon['current_version']
    for file_obj in current['files']:
        url = file_obj['url']
        filename = file_obj['id']

        directories = get_directories(addon['id'])
        target = '{}.xpi'.format(
            os.path.join(directories['files'], str(file_obj['id'])), )

        if os.path.exists(target):
            log.info('{}: Skipping download'.format(addon['id']))
            continue

        res = requests.get(url)
        if res.status_code == 404:
            log.warning('{}: got a 404'.format(addon['id']))
            continue
        else:
            res.raise_for_status()

        with open(target, 'wb') as filehandle:
            for chunk in res.iter_content(10000):
                filehandle.write(chunk)

        log.info('{}: Downloaded file: {}'.format(addon['id'], file_obj['id']))
Beispiel #12
0
 def inner(*args, **kwargs):
     start_time = time.time()
     ret = func(*args, **kwargs)
     log.warning('线程:%s, 父进程:%s, 耗时:%s, <Task (%s) finished!!!>' %
                 (threading.current_thread().getName(), os.getpid(),
                  time.time() - start_time, func.__name__))
     return ret
Beispiel #13
0
def load_from_list_category(data, lang, need_check=True):
    """解析从 list<text> 模式的 xlsx 中读取的翻译

    Args:
        data: 从 xlsx 读出的数据,data[i][j] 表示第 i 行第 j 列的数据
        lang (str): "zh"/"en", 读中文还是英文
        need_check (bool): 是否检查

    Returns:
        category (str): category from lang_def
        translated_data (list[str]): list of [file_id, unknown, index, text]
    """

    # check
    if need_check:
        for row in data:
            if row[4] != '' and not check_string_with_origin(row[3], row[4]):
                log.warning('check string failed: %s', str(row[1]))

    # 删除多余数据,只保留 内部编号, 英文/中文
    if lang == 'en':
        data = [(row[1], row[3]) for row in data]
    else:
        data = [(row[1], row[4]) for row in data]
    category = data[0][0].rsplit('-', 3)[0]

    # 恢复编号
    translated_data = []
    for intern_id, text in data:
        if intern_id != '' and text != '':
            file_id, unknown, index = [str(int(x)) for x in intern_id.rsplit('-', 3)[1:]]   # str 形式,不带前导0
            translated_data.append([file_id, unknown, index, text])

    return category, translated_data
Beispiel #14
0
    def build_predicates(self):
        assert len(self.all_instances) > 0
        assert self.treebank_reader is not None
        assert self.nombank_reader is not None
        assert self.predicate_mapping is not None
        assert self.corenlp_reader is not None

        if len(self.all_predicates) > 0:
            log.warning('Overriding existing predicates')
            self.all_predicates = []

        log.info('Building predicates')
        for instance in self.all_instances:
            predicate = Predicate.build(instance)
            predicate.set_pred(self.predicate_mapping[str(
                predicate.pred_pointer)])
            self.all_predicates.append(predicate)

        log.info('Checking explicit arguments with Nombank instances')
        for predicate in self.all_predicates:
            nombank_instance = self.nombank_reader.search_by_pointer(
                predicate.pred_pointer)
            predicate.check_exp_args(nombank_instance,
                                     add_missing_args=False,
                                     remove_conflict_imp_args=False,
                                     verbose=False)

        log.info('Parsing all implicit and explicit arguments')
        for predicate in self.all_predicates:
            predicate.parse_args(
                self.treebank_reader,
                self.corenlp_reader,
                include_non_head_entity=self.include_non_head_entity)
        log.info('Done')
async def task_info(chat, cq, match, user_data):
    await cq.answer()
    task_name = match.group(1)
    if task_name not in user_data['tasks']:
        log.warning('Task name \'{}\' not in user tasks {}', task_name,
                    user_data['tasks'].keys())
        log.debug('User data: {}', user_data)
        return await error_message(chat, None, user_data)
    task = user_data['tasks'][task_name]
    info = '''
        Task info:
        Name: {}
        State: {}
        URL: {}
        Blacklist: {}
        Whitelist: {}
    '''.format(task.name, 'active' if task.state else 'inactive',
               task.args['url'], ', '.join(task.args['blacklist']),
               ', '.join(task.args['whitelist']))
    delete_inline = make_inline_keyboard(
        1, (Buttons.TASK_DELETE, Constants.TASK_DELETE_TMPL.format(task_name)),
        to_json=False)
    return await chat.edit_text(chat.message['message_id'],
                                info,
                                markup=delete_inline)
Beispiel #16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--prefix', type=str, default='default')
    parser.add_argument('--model',
                        type=str,
                        default='baseline',
                        choices=['baseline', 'rn',
                                 'film'])  # model architecture

    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--learning_rate', type=float, default=1e-4)
    parser.add_argument(
        '--lr_weight_decay', action='store_true', default=False
    )  # if True, learning rate is decayed with exponential scheduling

    parser.add_argument('--checkpoint', type=str, default=None)
    parser.add_argument('--dataset_path',
                        type=str,
                        default='datasets/SortOfCLEVR_4_200000_32')
    parser.add_argument('--image_size', type=int, default=32)

    config = parser.parse_args()
    train_dataset = SortOfCLEVR(config.dataset_path, split='train')
    val_dataset = SortOfCLEVR(config.dataset_path, split='val')
    trainer = Trainer(config, train_dataset, val_dataset)

    log.warning("dataset: %s, learning_rate: %f", config.dataset_path,
                config.learning_rate)
    trainer.train()
Beispiel #17
0
    def push(self):
        """Push todo to gist.github.com"""
        github = Github()
        gist_id = GistId().get()
        token = GithubToken().get()

        github.login(token)

        if not self.todo.name:
            name = "Todo"
        else:
            name = self.todo.name

        files = {
            name: {
                "content": self.todo_content
            }
        }

        log.info(
            "Pushing '%s' to https://gist.github.com/%s .." % (name, gist_id)
        )
        response = github.edit_gist(gist_id, files=files)

        if response.status_code == 200:
            log.ok("Pushed success.")
        elif response.status_code == 401:
            log.warning("Github token out of date, empty the old token")
            GithubToken().save('')  # empty the token!
            self.push()  # and repush
        else:
            log.error("Pushed failed. %d" % response.status_code)
Beispiel #18
0
    def upload(self, src, after_copy=None):
        log.info("Mounting the {} folder".format(self._dest))
        try:
            filename = path.basename(src)

            call(['fusermount', '-u', '-z', self._mount_point])

            if not path.isdir(self._mount_point):
                mkdir(self._mount_point)

            cmd = 'sshfs {} {}@{}:{} {} -o IdentityFile={},StrictHostKeyChecking=no'.format(
                "-p " + str(self._port), self._user, self._host, self._dest,
                self._mount_point, self._identity)

            print(cmd)
            c, _, __ = self._run_process(cmd.split(' '))

            if c != 0:
                raise RuntimeError('Can\'t mount remote FS')

            shutil.copy2(src, path.join(self._mount_point, filename))

            if callable(after_copy):
                after_copy(self)

        except Exception as e:
            log.warning(
                'Unable to copy file to remote location. Exception: {}'.format(
                    e))
            raise
        finally:
            self._run_process(['fusermount', '-u', '-z', self._mount_point])
Beispiel #19
0
def prepare():
    log.info('prepare content')
    try:
        content.main(action='extract')
        create_sample_players()
    except Exception, e:
        log.warning('can not extract content: %s', e)
Beispiel #20
0
    def multi_compress(cls, directory='', callback_list=[]):
        '''多线程批量文件压缩
        :param: directory(String): 待压缩文件所在的目录绝对地址。
            '/usr/media/'
        :param: callback_list(List): 处理完文件压缩后的回调函数名组成的list。
            ['func', ...]
        '''

        log.warning('父进程:%s, 线程:%s, <Task (%s) start...>' %
                    (os.getpid(), threading.current_thread().getName(),
                     sys._getframe().f_code.co_name))

        executor = BoundedExecutor(0, 4)

        directory = directory.strip()
        if os.path.isdir(directory):
            file_path_list = os.listdir(directory)
        log.info(sys._getframe().f_code.co_name, 'file_path_list',
                 file_path_list)

        for file_path in file_path_list:
            future = executor.submit(cls.compress,
                                     file_path=os.path.join(
                                         directory, file_path))
            log.info(sys._getframe().f_code.co_name, 'file_path', file_path,
                     future)
            for callback in callback_list:
                future.add_done_callback(getattr(cls, callback))
        executor.shutdown(wait=True)
def download(addon):
    # TODO: prevent multiple downloads.
    current = addon['current_version']
    for file_obj in current['files']:
        url = file_obj['url']
        filename = file_obj['id']

        directories = get_directories(addon['id'])
        target = '{}.xpi'.format(
            os.path.join(directories['files'], str(file_obj['id'])),
        )

        if os.path.exists(target):
            log.info('{}: Skipping download'.format(addon['id']))
            continue

        res = requests.get(url)
        if res.status_code == 404:
            log.warning('{}: got a 404'.format(addon['id']))
            continue
        else:
            res.raise_for_status()

        with open(target, 'wb') as filehandle:
            for chunk in res.iter_content(10000):
                filehandle.write(chunk)

        log.info('{}: Downloaded file: {}'.format(addon['id'], file_obj['id']))
Beispiel #22
0
def main():
    if sys.argv[1] == '1':
        update_translation()
    elif sys.argv[1] == '2':
        merge_translation()
    else:
        log.warning('unknown args')
        sys.exit(-2)
 def test_invalid_response_received(self):
     # Response received is invalid
     self.ping_r_in._msg_dict[message.RESPONSE] = 'zz'
     ok_(not self.got_response) 
     log.warning(
         "**IGNORE WARNING LOG**")
     self.query.on_response_received(self.ping_r_in)
     ok_(not self.got_response)
Beispiel #24
0
    def evaluate(self, recommendations, test_urm, at_k=10, single_ap=False, verbose=True):
        """
        Return the MAP@k evaluation for the provided recommendations
        computed with respect to the test_urm

        Parameters
        ----------
        recommendations : list
            List of recommendations, where a recommendation
            is a list (of length N+1) of playlist_id and N items_id:
                [   [7,   18,11,76, ...] ,
                    [13,  65,83,32, ...] ,
                    [25,  30,49,65, ...] , ... ]
        test_urm : csr_matrix
            A sparse matrix
        at_k : int, optional
            The number of items to compute the precision at
        single_ap: bool, optional
            If True, return also the array of AP for each user

        Returns
        -------
        MAP@k: (float) MAP for the provided recommendations
        """

        if not at_k > 0:
            log.error('Invalid value of k {}'.format(at_k))
            return

        start = time.time()
        aps = 0.0
        ap_array = []
        for r in recommendations:
            row = test_urm.getrow(r[0]).indices
            m = min(at_k, len(row))

            ap = 0.0
            n_elems_found = 0.0
            for j in range(1, m+1):
                if r[j] in row:
                    n_elems_found += 1
                    ap = ap + n_elems_found/j
            if m > 0:
                ap = ap/m
                aps += ap
            if single_ap:
                ap_array.append(ap)

        result = aps/len(recommendations)
        print('MAP computed in {:.2f} s'.format(time.time() - start))
        if verbose:
            log.warning('MAP: {}'.format(result))
        
        if single_ap:
            return result, ap_array
        else:
            return result
Beispiel #25
0
 async def recv(self):
     async for data in self.socket:
         msg = Message.load(data)
         if msg is not None:
             # log.debug(f'Received: {msg} from {self}')
             await self.dispatcher.dispatch(msg, self)
         else:
             log.warning(f'Bad message:\n{data}')
             self.rank -= 1
Beispiel #26
0
    def run(self, target=None, tid=None, pid=None):
        if target is None:
            log.critical("Please set --target param")
            sys.exit()
        if tid is not None:
            task_id = tid
            # Start Time For Task
            t = CobraTaskInfo.query.filter_by(id=tid).first()
            if t is None:
                log.critical("Task id doesn't exists.")
                sys.exit()
            if t.status not in [0, 1]:
                log.critical("Task Already Scan.")
                sys.exit()
            t.status = 1
            t.time_start = int(time.time())
            t.updated_at = time.strftime('%Y-%m-%d %X', time.localtime())
            try:
                db.session.add(t)
                db.session.commit()
            except Exception as e:
                log.error("Set start time failed" + str(e.message))
        else:
            task_id = None

        target_type = self.parse_target(target)
        if target_type is False:
            log.error("""
                Git Repository: must .git end
                SVN Repository: can http:// or https://
                Directory: must be local directory
                File: must be single file or tar.gz/zip/rar compress file
                """)
        from engine import static
        s = static.Static(target, task_id=task_id, project_id=pid)
        if target_type is 'directory':
            s.analyse()
        elif target_type is 'compress':
            from utils.decompress import Decompress
            # load an compressed file. only tar.gz, rar, zip supported.
            dc = Decompress(target)
            # decompress it. And there will create a directory named "222_test.tar".
            dc.decompress()
            s.analyse()
        elif target_type is 'file':
            s.analyse()
        elif target_type is 'git':
            from pickup.GitTools import Git
            g = Git(target, branch='master')
            g.get_repo()
            if g.clone() is True:
                s.analyse()
            else:
                log.critical("Git clone failed")
        elif target_type is 'svn':
            log.warning("Not Support SVN Repository")
Beispiel #27
0
    def run(self, target=None, tid=None, pid=None):
        if target is None:
            log.critical("Please set --target param")
            sys.exit()
        if tid is not None:
            task_id = tid
            # Start Time For Task
            t = CobraTaskInfo.query.filter_by(id=tid).first()
            if t is None:
                log.critical("Task id doesn't exists.")
                sys.exit()
            if t.status not in [0, 1]:
                log.critical("Task Already Scan.")
                sys.exit()
            t.status = 1
            t.time_start = int(time.time())
            t.updated_at = time.strftime('%Y-%m-%d %X', time.localtime())
            try:
                db.session.add(t)
                db.session.commit()
            except Exception as e:
                log.error("Set start time failed" + str(e.message))
        else:
            task_id = None

        target_type = self.parse_target(target)
        if target_type is False:
            log.error("""
                Git Repository: must .git end
                SVN Repository: can http:// or https://
                Directory: must be local directory
                File: must be single file or tar.gz/zip/rar compress file
                """)
        from engine import static
        s = static.Static(target, task_id=task_id, project_id=pid)
        if target_type is 'directory':
            s.analyse()
        elif target_type is 'compress':
            from utils.decompress import Decompress
            # load an compressed file. only tar.gz, rar, zip supported.
            dc = Decompress(target)
            # decompress it. And there will create a directory named "222_test.tar".
            dc.decompress()
            s.analyse()
        elif target_type is 'file':
            s.analyse()
        elif target_type is 'git':
            from pickup.GitTools import Git
            g = Git(target, branch='master')
            g.get_repo()
            if g.clone() is True:
                s.analyse()
            else:
                log.critical("Git clone failed")
        elif target_type is 'svn':
            log.warning("Not Support SVN Repository")
Beispiel #28
0
def main():
    if sys.argv[1] == '1':
        gen_chs()
    elif sys.argv[1] == '2':
        gen_cht()
    elif sys.argv[1] == '3':
        gen_chs_force()
    else:
        log.warning('unknown args')
        sys.exit(-2)
Beispiel #29
0
 async def parse(self, url):
     host = detect_host(url)
     handler = getattr(self, 'parse_' + host, None)
     if not handler:
         log.warning('Unsupported URL ({}): {}', host, url)
         return
     async with aiohttp.ClientSession() as session:
         async with session.get(url) as page:
             document = html.fromstring(await page.read())
     return handler(document)
Beispiel #30
0
def main():
    if sys.argv[1] == '1':
        gen_chs()
    elif sys.argv[1] == '2':
        gen_cht()
    elif sys.argv[1] == '3':
        gen_chs_force()
    else:
        log.warning('unknown args')
        sys.exit(-2)
Beispiel #31
0
 def rep_mention(self, rep_mention):
     check_type(rep_mention, Mention)
     if self._rep_mention is not None:
         if self._rep_mention.has_same_span(rep_mention):
             return
         else:
             log.warning('Overriding existing rep_mention ({})'.format(
                 self._rep_mention))
             self._rep_mention.rep = False
     self._rep_mention = rep_mention
Beispiel #32
0
 def get_parent(self, token_idx, msg_prefix=''):
     parent = self.lookup('mod', token_idx, include_extra=False)
     if len(parent) == 0:
         return 'root', -1
     if len(parent) > 1 or len(parent.items()[0][1]) > 1:
         log.warning(
             '{}: In sentence #{}, token #{} has more than 1 non-extra '
             'head token: {}'.format(msg_prefix, self._sent_idx, token_idx,
                                     parent))
     return parent.items()[0][0], parent.items()[0][1][0]
Beispiel #33
0
def main():
    lang = 'en'

    # getopt
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'l:')
    except getopt.GetoptError as e:
        log.error(e)
        sys.exit(2)
    for o, a in opts:
        if o == '-l':
            lang = a

    cd = sys.path[0]
    translation_path = os.path.join(cd, '../translation/lang')
    dest_path = translation_path

    # load translation
    lines_grouped_by_id = {}    # ID 作为 key, 每个 key 对应一个 list,每个 list 中的成员是原文件中的一行
    translate_file = os.path.join(translation_path, '%s.lang.csv' % lang)

    with open(translate_file, 'rt', encoding='utf-8') as fp:
        fp.readline()
        lines = fp.readlines()

    # split
    for line in lines:
        _id = line.split(',', 1)[0]
        _id = _id[1:-1]     # remove "
        if _id not in lines_grouped_by_id.keys():
            lines_grouped_by_id[_id] = []
        lines_grouped_by_id[_id].append(line)

    for _id, lines_with_same_id in sorted(lines_grouped_by_id.items()):
        target_file = os.path.join(dest_path, '%s.%s.lang.csv' % (lang, _id))
        log.debug('split to file %s' % target_file)
        with open(target_file, 'wt', encoding='utf-8') as fp:
            fp.writelines(lines_with_same_id)

    # known id
    known_id = set()
    for values in (file_id_of_pair.values(), file_id_of_list.values(), file_id_of_array.values()):
        for id_tuple in values:
            for _id in id_tuple:
                known_id.add(_id)
    known_id = known_id | ignored_file_id

    # file list
    target_file = os.path.join(dest_path, '%s.lang.split.txt' % lang)
    with open(target_file, 'wt', encoding='utf-8') as fp:
        id_list = sorted([int(_id) for _id in lines_grouped_by_id.keys()])
        for _id in id_list:
            fp.write('%d\n' % _id)
            if str(_id) not in known_id:
                log.warning('warning: unknown id %d.' % _id)
Beispiel #34
0
 def map_keys(self, f, merge, warn=False):
     y = {}
     for k, v in self.items.items():
         z = f(k)
         if z in y:
             if (warn):
                 warning("Duplicate key: {}".format(z))
             y[z] = merge(y[z], v)  # merge these 2 values
         else:
             y[z] = v
     return DictionaryCollection(self, y)
Beispiel #35
0
    def get_face(self,
                 filename,
                 landmarks=None,
                 size=(cfg.CROP_SIZE, cfg.CROP_SIZE),
                 use_cache=True,
                 from_sequence=False):
        # landmarks = np.zeros((68, 2), dtype=np.float32)
        # pose = np.zeros(3, dtype=np.float32)
        crop_filepath = os.path.join(self.cropped_img_dir, filename + '.jpg')

        if use_cache and os.path.isfile(crop_filepath):
            try:
                crop = io.imread(crop_filepath)
            except OSError:
                os.remove(crop_filepath)
                return self.get_face(filename, landmarks, size, use_cache,
                                     from_sequence)
            if crop.shape[:2] != size:
                crop = cv2.resize(crop, size, interpolation=cv2.INTER_CUBIC)
            if landmarks is None:
                of_conf, landmarks, _ = ds_utils.read_openface_detection(
                    os.path.join(self.feature_dir, filename),
                    numpy_lmFilepath=os.path.join(self.npfeature_dir,
                                                  filename))
            landmarks = face_processing.scale_landmarks_to_crop(
                landmarks, output_size=size)
        else:
            # Load image from dataset
            img_path = os.path.join(self.fullsize_img_dir, filename + '.jpg')
            img = io.imread(img_path)
            if img is None:
                raise IOError(
                    "\tError: Could not load image {}!".format(img_path))

            # load landmarks extracted with OpenFace2
            if landmarks is None:
                of_conf, landmarks, _ = ds_utils.read_openface_detection(
                    os.path.join(self.feature_dir, filename),
                    numpy_lmFilepath=os.path.join(self.npfeature_dir,
                                                  filename),
                    from_sequence=from_sequence)
                if of_conf <= 0.0:
                    log.warning("No landmarks for image {}".format(filename))

            # crop, landmarks = face_processing.crop_bump(img, landmarks, output_size=size)
            crop, landmarks = face_processing.crop_celebHQ(img,
                                                           landmarks,
                                                           output_size=size)

            if use_cache:
                utils.io.makedirs(crop_filepath)
                io.imsave(crop_filepath, crop)

        return crop, landmarks
Beispiel #36
0
    def run(self,
            num_factors,
            urm_train=None,
            urm=None,
            urm_test=None,
            targetids=None,
            with_scores=False,
            export=True,
            verbose=True):
        """
        Run the model and export the results to a file

        Parameters
        ----------
        num_factors : int, number of latent factors
        urm : csr matrix, URM. If None, used: data.get_urm_train(). This should be the
            entire URM for which the targetids corresponds to the row indexes.
        urm_test : csr matrix, urm where to test the model. If None, use: data.get_urm_test()
        targetids : list, target user ids. If None, use: data.get_target_playlists()

        Returns
        -------
        recs: (list) recommendations
        map10: (float) MAP10 for the provided recommendations
        """
        _urm = data.get_urm_train()
        _icm = data.get_icm()
        _urm_test = data.get_urm_test()
        _targetids = data.get_target_playlists()
        #_targetids = data.get_all_playlists()

        start = time.time()

        urm_train = _urm if urm_train is None else urm_train
        #urm = _urm if urm is None else urm
        urm_test = _urm_test if urm_test is None else urm_test
        targetids = _targetids if targetids is None else targetids

        self.fit(urm_train=urm_train, num_factors=num_factors)
        recs = self.recommend_batch(userids=targetids)

        map10 = None
        if len(recs) > 0:
            map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose)
        else:
            log.warning('No recommendations available, skip evaluation')

        if export:
            exportcsv(recs, path='submission', name=self.name, verbose=verbose)

        if verbose:
            log.info('Run in: {:.2f}s'.format(time.time() - start))

        return recs, map10
Beispiel #37
0
 def stop_server(_=None):
     try:
         with open('/tmp/infnote_chain.pid', 'r') as file:
             pid = int(file.readline())
             os.kill(pid, signal.SIGTERM)
             log.info(f'Killed server by PID {pid}')
     except FileNotFoundError:
         log.warning(
             'PID file is not exist. Process may not startup correctly.')
     except ProcessLookupError:
         log.warning(f'No such process PID {pid}.')
Beispiel #38
0
 def do_verify(self, trans_receipt, timeout=10):
     url = INAPP_PURCHASE_VERIFY_URL
     resp = self._send_verify(url, trans_receipt, timeout)
     if 21007 == resp.get("status"):
         # player use an sendbox account
         log_data = {"player_id": self.player_id,
                     "resp": resp,
                     "receipt": trans_receipt}
         log.warning("IAP - Player use an sendbox account: %s"
                     % str(log_data))
         resp = self._send_verify(INAPP_PURCHASE_SANDBOX_VERIFY_URL,
                                  trans_receipt, timeout)
     return resp
Beispiel #39
0
 def get_with_retry(self, key):
     """
     Like GConnect.get() but retries on temporary errors.
     """
     while True:
         try:
             val = self.get(key)
             return val
         except TemporaryFailError, e:
             log.warning("TemporaryFailError: can't get key '%s' - "
                         "retry in 2 secs...", key)
         time.sleep(2)
         continue
def _uncompact_nodes2(c_nodes):
    nodes = []
    for c_node in c_nodes:
        node_id = Id(c_node[:ID_SIZE_BYTES])
        try:
            node_addr = uncompact_addr(c_node[ID_SIZE_BYTES:]) 
        except (AddrError):
            log.warning('IPv6 addr in nodes2: %s' % c_node)
        except (AddrError):
            pass
        else:
            node = Node(node_addr, node_id)
            nodes.append(node)
    return nodes
 def test_block_flood(self):
     from floodbarrier import MAX_PACKETS_PER_PERIOD as FLOOD_LIMIT
     for _ in xrange(FLOOD_LIMIT):
         self.client_r.sendto(DATA, tc.SERVER_ADDR)
     for _ in xrange(10):
         self.client_r.sendto(DATA, tc.SERVER_ADDR)
         log.warning(
             "TESTING LOGS ** IGNORE EXPECTED WARNING **")
     time.sleep(tc.TASK_INTERVAL)
     with self.lock:
         log.debug('datagram processed: %d/%d' % (
                           len(self.datagrams_received),
                           FLOOD_LIMIT))
         assert len(self.datagrams_received) <= FLOOD_LIMIT
 def test_fire_callback_on_late_response(self):
     self.query.timeout_task.fire_callbacks()
     self.query.timeout_task.cancel()
     # the server creates the response
     pong_msg = message.OutgoingPingResponse(tc.SERVER_ID)
     pong_data = pong_msg.encode(tc.TID)
     # rpc_m decodes the response received
     pong_msg = message.IncomingMsg(pong_data)
     # querier notifies of the message (but it's too late)
     self.query.on_response_received(pong_msg)
     log.warning(
         "**IGNORE WARNING LOG**")
     assert not self.got_response and not self.got_error \
            and self.got_timeout
Beispiel #43
0
    def apply_one_translate(self, name, origin, translation, need_check=True):
        """应用一条翻译

        先检查原文是否一致。不检查更新。

        Args:
            name (str): 名字, SI_ 开头
            origin (str): 原文
            translation (str): 译文
            need_check (bool): 是否检查
        """
        if name in self.ui_lines.keys() and origin == self.ui_lines[name].origin:
            if need_check and not check_string_with_origin(translation, origin):
                log.warning('check string failed: %s', name)
            self.ui_lines[name].set_translation(translation)
Beispiel #44
0
def config_pytorch(options):
    """Config pytorch packages.

    Fix random number for packages and initialize distributed environment for pytorch.
    Setup cuda environment for pytorch.

    :param options: A global object containing specified options.
    :type options: argparse.Namespace
    """

    # Setting `cudnn.deterministic = True` will turn on
    # CUDNN deterministic setting which can slow down training considerably.
    # Unexpected behavior may also be observed from checkpoint.
    # See: https: // github.com/pytorch/examples/blob/master/imagenet/main.py
    if options.cudnn_deterministic:
        cudnn.deterministic = True
        log.warning('You have chosen to seed training. '
                    'This will turn on the CUDNN deterministic setting, '
                    'which can slow down your training considerably! '
                    'You may see unexpected behavior when restarting '
                    'from checkpoints.', 0)

    if options.seed is not None:
        random.seed(options.seed)
        torch.manual_seed(options.seed)

    # define the graph for the computation.
    if options.use_cuda:
        assert torch.cuda.is_available()

    options.rank = dist.get_rank()
    options.world_size = dist.get_world_size()
    options.graph = FCGraph(options)

    # enable cudnn accelerator if we are using cuda.
    if options.use_cuda:
        options.graph.assigned_gpu_id()
        torch.backends.cudnn.enabled = True
        torch.backends.cudnn.benchmark = True

        if torch.backends.cudnn.version() is None:
            log.warning("CUDNN not found on device.")

        log.info("World size={}, Rank={}, hostname={}, cuda_available={}, cuda_device={}".format(
            options.world_size, options.rank, socket.gethostname(), torch.cuda.is_available(),
            torch.cuda.current_device()))
 def on_response_received(self, response_msg):
     try:
         response_msg.sanitize_response(self.query)
     except (message.MsgError):
         log.exception(
             "We don't like dirty reponses: %r|nresponse ignored"
             % response_msg)
         return # Response ignored 
     self.node.is_ns = response_msg.ns_node
     if self.node.id:
         if response_msg.sender_id != self.node.id:
             return # Ignore response
     else:
         self.node.id = response_msg.sender_id
     #TODO2: think whether late responses should be accepted
     if self.timeout_task.cancelled:
         log.warning(
             "Response recevived but it's too late!!\n%r, %r" %
             (response_msg,
             self.timeout_task))
         return # Ignore response
     self.timeout_task.cancel()
     nodes = []
     try:
         nodes.extend(response_msg.nodes)
     except (AttributeError):
         pass
     try:
         nodes.extend(response_msg.nodes2)
     except (AttributeError):
         pass
     # Notify routing manager (if nodes found).
     # Do not notify when the query was a GET_PEERS because
     # the lookup is in progress and the routing_m shouldn't
     # generate extra traffic.
     if self.query == message.FIND_NODE and \
             nodes and self.notify_routing_m_on_nodes_found_f:
         self.notify_routing_m_on_nodes_found_f(nodes)
     # Notify routing manager (response)
     self.node.is_ns = response_msg.ns_node
     if self.notify_routing_m_on_response_f:
         self.notify_routing_m_on_response_f(self.node)
     # Do callback to whomever did the query
     self.on_response_f(response_msg, self.node)
     return True # the response was fine
Beispiel #46
0
def load_from_pair_category(data, lang, need_check=True):
    """解析从 <name, desc> 模式的 xlsx 中读取的翻译

    Args:
        data: 从 xlsx 读出的数据,data[i][j] 表示第 i 行第 j 列的数据
        lang (str): "zh"/"en", 读中文还是英文
        need_check (bool): 是否检查

    Returns:
        category (str): category from lang_def
        translated_data (list[str]): list of [file_id, unknown, index, text]
    """

    # check
    if need_check:
        for row in data:
            if (row[4] != '' and not check_string_with_origin(row[3], row[4])) \
                    or (row[7] != '' and not check_string_with_origin(row[6], row[7])):
                log.warning('check string failed: %s', str(row[1]))

    # 删除多余数据,只保留 内部编号, 英文/中文名称, 英文/中文描述
    if lang == 'en':
        data = [(row[1], row[3], row[6]) for row in data]
    else:
        data = [(row[1], row[4], row[7]) for row in data]

    category = data[0][0].rsplit('-', 1)[0]
    name_file_id, desc_file_id = file_id_of_pair[category]

    # 恢复编号
    translated_data = []
    for intern_id, name, desc in data:
        if intern_id == '':
            continue        # 空行
        index = intern_id.rsplit('-', 1)[-1]
        index = str(int(index))     # 消除前导0
        # 这里直接令 unknown 为 0,对此类数据,暂时没有发现例外
        unknown = '0'
        if name != '':
            translated_data.append([name_file_id, unknown, index, name])
        if desc != '':
            translated_data.append([desc_file_id, unknown, index, desc])

    return category, translated_data
Beispiel #47
0
def init_cb():
    global cb
    bucket = settings.COUCHBASE_BUCKET
    host = settings.COUCHBASE_HOST
    log.debug("connecting to bucket '%s' on host '%s'", bucket, host)
    if bucket and host:
        while True:
            try:
                cb = GConnectionExtention(bucket=bucket, host=host, quiet=True)
                log.debug("connected")
                break
            except NetworkError, e:
                log.warning("NetworkError: can't connect to host '%s' - "
                            "retry in 5 secs...", host)
            except BucketNotFoundError, e:
                log.warning("BucketNotFoundError: can't find bucket '%s' "
                            "on host '%s' - retry in 5 secs...", bucket, host)
            time.sleep(5)
            continue
 def test_listen_upd(self):
     r = ThreadedReactor()
     r.start()
     log.warning(''.join(
         ('TESTING LOGS ** IGNORE EXPECTED WARNING ** ',
          '(udp_listen has not been called)')))
     self.client_r.sendto(DATA, tc.SERVER_ADDR)
     while 1: #waiting for data
         with self.lock:
             if self.datagrams_received:
                 break
         time.sleep(tc.TASK_INTERVAL)
     with self.lock:
         first_datagram = self.datagrams_received.pop(0)
         log.debug('first_datagram: %s, %s' % (
                 first_datagram,
                 (DATA, tc.CLIENT_ADDR)))
         assert first_datagram, (DATA, tc.CLIENT_ADDR)
     r.stop()
    def run(self):
        """Main loop activated by calling self.start()"""
        
        last_task_run = time.time()
        stop_flag = self.stop_flag
        while not stop_flag:
            timeout_raised = False
            try:
                data, addr = self.s.recvfrom(BUFFER_SIZE)
            except (AttributeError):
                log.warning('udp_listen has not been called')
                time.sleep(self.task_interval)
                #TODO2: try using Event and wait
                timeout_raised = True
            except (socket.timeout):
                timeout_raised = True
            except (socket.error), e:
                log.critical(
                    'Got socket.error when receiving (more info follows)')
                log.exception('See critical log above')
            else:
                ip_is_blocked = self.floodbarrier_active and \
                                self.floodbarrier.ip_blocked(addr[0])
                if ip_is_blocked:
                    log.warning('%s blocked' % `addr`)
                else:
                    self.datagram_received_f(data, addr)

            if timeout_raised or \
                   time.time() - last_task_run > self.task_interval:
                #with self._lock:
                self._lock.acquire()
                try:
                    while True:
                        task = self.tasks.consume_task()
                        if task is None:
                            break
                        task.fire_callbacks()
                    stop_flag = self.stop_flag
                finally:
                    self._lock.release()
 def on_response_received(self, response_msg, addr):
     # TYPE and TID already sanitized by rpc_manager
     log.debug('response received: %s' % repr(response_msg))
     try:
         addr_query_list = self.pending[addr]
     except (KeyError):
         log.warning('No pending queries for %s', addr)
         return # Ignore response
     # There are pending queries from node (let's find the right one (TID)
     query_found = False
     for query_index, query in enumerate(addr_query_list):
         log.debug('response node: %s, query:\n(%s, %s)' % (
             `addr`,
             `query.tid`,
             `query.query`))
         if query.matching_tid(response_msg.tid):
             query_found = True
             break
     if not query_found:
         log.warning('No query for this response\n%s\nsource: %s' % (
             response_msg, addr))
         return # ignore response 
     # This response matches query. Trigger query's callback
     response_is_ok = query.on_response_received(response_msg)
     if response_is_ok:
         # Remove this query from pending
         if len(addr_query_list) == 1:
             # There is one item in the list. Remove the whole list.
             del self.pending[addr]
         else:
             del addr_query_list[query_index]
     else:
         log.warning('Bad response from %r\n%r' % (addr,
                                                       response_msg))
Beispiel #51
0
def get_csv_from_xls(translation_path, lang):
    """从文件夹中读取所有翻译文件

    Args:
        translation_path (str): 存放翻译 xlsx 文件的路径或文件
        lang (str): "zh"/"en", 读中文还是英文

    Returns:
        csv_list (dict[str: list]): dict<str, list>, 根据 category 归类的翻译
        ui_xls_file: UI翻译文件,如果找到了
    """
    file_list = []
    if os.path.isfile(translation_path):
        file_list.append((translation_path, translation_path))
    else:
        for dir_path, dir_names, file_names in os.walk(translation_path):
            for file_name in file_names:
                if file_name.lower().endswith('.xlsx') and not file_name.startswith('~') \
                        and (file_name.startswith('en.') or lang == 'zh'):
                    file_path = os.path.join(dir_path, file_name)
                    file_list.append((file_name, file_path))

    category_to_translated = {}
    ui_xls_file = None
    for file_name, file_path in file_list:
        # load from one file
        log.info('load from %s' % file_name)
        category, translated_data = load_from_langxls(file_path, lang, need_check=False, load_ui=True)
        log.info('load %d %ss' % (len(translated_data), category))
        if category in category_to_translated:
            log.warning('warning: override category %s' % category)
        category_to_translated[category] = translated_data
        if category == 'UI':
            ui_xls_file = file_path

    list_list = [line for _, translated_data in sorted(category_to_translated.items())
                 for line in translated_data]
    csv_list = ['"%s","%s","%s","0","%s"\n' % (line[0], line[1], line[2], line[3])
            for line in list_list]
    return csv_list, ui_xls_file
Beispiel #52
0
def load_from_ui_fake(data, lang, need_check=True):
    """解析从UI模式的 xlsx 中读取的翻译

    Args:
        data: 从 xlsx 读出的数据,data[i][j] 表示第 i 行第 j 列的数据
        lang (str): "zh"/"en", 读中文还是英文
        need_check (bool): 是否检查

    Returns:
        category (str): "UI"
        translated_data (list[str]): list of [file_id, unknown, index, text]
    """

    # check
    if need_check:
        for row in data:
            if (row[2] != '' and not check_string_with_origin(row[3], row[4])) \
                    or (row[3] != '' and not check_string_with_origin(row[6], row[7])):
                log.warning('check string failed: %s', str(row[1]))

    # 删除多余数据,只保留 名称, 原文/译文
    if lang == 'en':
        data = [(row[1], row[2]) for row in data]
    else:
        data = [(row[1], row[3]) for row in data]

    category = 'UI'

    # 恢复编号
    translated_data = []
    for intern_id, text in data:
        if intern_id == '':
            continue        # 空行
        # file_id=='UI', unknown=='0', index==intern_id
        index = intern_id
        unknown = '0'
        translated_data.append([category, unknown, index, text])

    return category, translated_data
    def test_tools(self):
        bin_strs = ["23", "\1\5", "a\3"]
        for bs in bin_strs:
            i = bin_to_int(bs)
            bs2 = int_to_bin(i)
            log.debug("bs: %s, bin_to_int(bs): %d, bs2: %s" % (bs, i, bs2))
            assert bs == bs2

        ips = ["127.0.0.1", "222.222.222.222", "1.2.3.4"]
        ports = [12345, 99, 54321]
        for addr in zip(ips, ports):
            c_addr = compact_addr(addr)
            addr2 = uncompact_addr(c_addr)
            assert addr == addr2

            c_peers = message._compact_peers(tc.PEERS)
            peers = message._uncompact_peers(c_peers)
            for p1, p2 in zip(tc.PEERS, peers):
                assert p1[0] == p2[0]
                assert p1[0] == p2[0]

            c_nodes = message._compact_nodes(tc.NODES)
            nodes = message._uncompact_nodes(c_nodes)
            for n1, n2 in zip(tc.NODES, nodes):
                assert n1 == n2

        bin_ipv6s = ["\x00" * 10 + "\xff\xff" + "\1\2\3\4", "\x22" * 16]
        assert bin_to_ip(bin_ipv6s[0]) == "1.2.3.4"
        assert_raises(AddrError, bin_to_ip, bin_ipv6s[1])

        PORT = 7777
        BIN_PORT = int_to_bin(PORT)
        c_nodes2 = [tc.CLIENT_ID.bin_id + ip + BIN_PORT for ip in bin_ipv6s]
        nodes2 = [node.Node(("1.2.3.4", PORT), tc.CLIENT_ID)]
        log.debug(message._uncompact_nodes2(c_nodes2))
        assert message._uncompact_nodes2(c_nodes2) == nodes2
        log.warning("**IGNORE WARNING LOG** This exception was raised by a test")
 def sanitize_response(self, query):
     self._sanitize_common()
     # sender_id
     self.sender_id = self._get_id(RESPONSE, ID)
     if query in [FIND_NODE, GET_PEERS]:
         # nodes
         nodes_found = False
         c_nodes = self._get_str(RESPONSE, NODES, optional=True)
         if c_nodes:
             self.nodes = _uncompact_nodes(c_nodes)
             nodes_found = True
         # nodes2
         try:
             self.nodes2 = _uncompact_nodes2(
                 self._msg_dict[RESPONSE][NODES2])
             if nodes_found:
                 log.info('Both nodes and nodes2 found')
             nodes_found = True
         except (KeyError):
             pass
     if query == FIND_NODE:
         if not nodes_found:
             log.warning('No nodes in find_node response')
             raise MsgError, 'No nodes in find_node response'
     elif query == GET_PEERS:
         # peers
         try:
             self.peers = _uncompact_peers(
                 self._msg_dict[RESPONSE][VALUES])
             if nodes_found:
                 log.warning(
                     'Nodes and peers found in get_peers response')
         except (KeyError):
             if not nodes_found:
                 log.warning(
                     'No nodes or peers found in get_peers response')
                 raise (MsgError,
                        'No nodes or peers found in get_peers response')
         # token
         self.token = self._get_str(RESPONSE, TOKEN)
Beispiel #55
0
def check_xls(src_path, column_id, origin_column_id):
    """检查xls

    Args:
        src_path (str): 待检查的 xlsx 文件的路径
        column_id (int): 翻译后的列的 id
        origin_column_id (int): 原文的列的 id
    """
    data = load_xls(src_path)
    for line in data:
        text_is_ok = False
        try:
            text_to_check = line[column_id]
            # skip empty line
            if text_to_check == '':
                continue
            text_is_ok = check_string(text_to_check)
            if origin_column_id is not None:
                text_is_ok &= check_string_with_origin(line[column_id], line[origin_column_id])
        except Exception as e:
            log.warning(line)
            log.warning(e)
        if not text_is_ok:
            log.warning('Failed when checking:\n%s\n' % ', '.join(line))
Beispiel #56
0
    def analyse(self):
        if self.directory is None:
            log.critical("Please set directory")
            sys.exit()
        log.info('Start code static analyse...')

        d = directory.Directory(self.directory)
        files = d.collect_files(self.task_id)
        log.info('Scan Files: {0}, Total Time: {1}s'.format(files['file_nums'], files['collect_time']))

        ext_language = {
            # Image
            '.jpg': 'image',
            '.png': 'image',
            '.bmp': 'image',
            '.gif': 'image',
            '.ico': 'image',
            '.cur': 'image',
            # Font
            '.eot': 'font',
            '.otf': 'font',
            '.svg': 'font',
            '.ttf': 'font',
            '.woff': 'font',
            # CSS
            '.css': 'css',
            '.less': 'css',
            '.scss': 'css',
            '.styl': 'css',
            # Media
            '.mp3': 'media',
            '.swf': 'media',
            # Execute
            '.exe': 'execute',
            '.sh': 'execute',
            '.dll': 'execute',
            '.so': 'execute',
            '.bat': 'execute',
            '.pl': 'execute',
            # Edit
            '.swp': 'tmp',
            # Cert
            '.crt': 'cert',
            # Text
            '.txt': 'text',
            '.csv': 'text',
            '.md': 'markdown',
            # Backup
            '.zip': 'backup',
            '.bak': 'backup',
            '.tar': 'backup',
            '.rar': 'backup',
            '.tar.gz': 'backup',
            '.db': 'backup',
            # Config
            '.xml': 'config',
            '.yml': 'config',
            '.spf': 'config',
            '.iml': 'config',
            '.manifest': 'config',
            # Source
            '.psd': 'source',
            '.as': 'source',
            # Log
            '.log': 'log',
            # Template
            '.template': 'template',
            '.tpl': 'template',
        }
        for ext in files:
            if ext in ext_language:
                log.info('{0} - {1}'.format(ext, files[ext]))
                continue
            else:
                log.info(ext)

        languages = CobraLanguages.query.all()

        rules = CobraRules.query.filter_by(status=1).all()
        extensions = None
        # `grep` (`ggrep` on Mac)
        grep = '/bin/grep'
        # `find` (`gfind` on Mac)
        find = '/bin/find'
        if 'darwin' == sys.platform:
            ggrep = ''
            gfind = ''
            for root, dir_names, file_names in os.walk('/usr/local/Cellar/grep'):
                for filename in file_names:
                    if 'ggrep' == filename:
                        ggrep = os.path.join(root, filename)
            for root, dir_names, file_names in os.walk('/usr/local/Cellar/findutils'):
                for filename in file_names:
                    if 'gfind' == filename:
                        gfind = os.path.join(root, filename)
            if ggrep == '':
                log.critical("brew install ggrep pleases!")
                sys.exit(0)
            else:
                grep = ggrep
            if gfind == '':
                log.critical("brew install findutils pleases!")
                sys.exit(0)
            else:
                find = gfind

        for rule in rules:
            log.info('Scan rule id: {0} {1} {2}'.format(self.project_id, rule.id, rule.description))
            # Filters
            for language in languages:
                if language.id == rule.language:
                    extensions = language.extensions.split('|')
            if extensions is None:
                log.critical("Rule Language Error")
                sys.exit(0)

            # White list
            white_list = []
            ws = CobraWhiteList.query.filter_by(project_id=self.project_id, rule_id=rule.id, status=1).all()
            if ws is not None:
                for w in ws:
                    white_list.append(w.path)

            try:
                if rule.regex.strip() == "":
                    filters = []
                    for index, e in enumerate(extensions):
                        if index > 1:
                            filters.append('-o')
                        filters.append('-name')
                        filters.append('*' + e)
                    # Find Special Ext Files
                    param = [find, self.directory, "-type", "f"] + filters
                else:
                    filters = []
                    for e in extensions:
                        filters.append('--include=*' + e)

                    # Explode SVN Dir
                    filters.append('--exclude-dir=.svn')
                    filters.append('--exclude-dir=.cvs')
                    filters.append('--exclude-dir=.hg')
                    filters.append('--exclude-dir=.git')
                    filters.append('--exclude-dir=.bzr')
                    filters.append('--exclude=*.svn-base')
                    # -n Show Line number / -r Recursive / -P Perl regular expression
                    param = [grep, "-n", "-r", "-P"] + filters + [rule.regex, self.directory]

                # log.info(' '.join(param))
                p = subprocess.Popen(param, stdout=subprocess.PIPE)
                result = p.communicate()

                # Exists result
                if len(result[0]):
                    lines = str(result[0]).split("\n")
                    for line in lines:
                        line = line.strip()
                        if line == '':
                            continue
                        if rule.regex.strip() == '':
                            # Find
                            file_path = line.strip().replace(self.directory, '')
                            log.debug('File: {0}'.format(file_path))
                            vul = CobraResults(self.task_id, rule.id, file_path, 0, '')
                            db.session.add(vul)
                        else:
                            # Grep
                            line_split = line.replace(self.directory, '').split(':', 1)
                            file_path = line_split[0].strip()
                            code_content = line_split[1].split(':', 1)[1].strip()
                            line_number = line_split[1].split(':', 1)[0].strip()

                            if file_path in white_list or ".min.js" in file_path:
                                log.info("In white list or min.js")
                            else:
                                # Annotation
                                # # // /* *
                                match_result = re.match("(#)?(//)?(\*)?(/\*)?", code_content)
                                if match_result.group(0) is not None and match_result.group(0) is not "":
                                    log.info("In Annotation")
                                else:
                                    log.info('In Insert')
                                    exist_result = CobraResults.query.filter_by(task_id=self.task_id, rule_id=rule.id, file=file_path, line=line_number).first()
                                    if exist_result is not None:
                                        log.warning("Exists Result")
                                    else:
                                        log.debug('File: {0}:{1} {2}'.format(file_path, line_number, code_content))
                                        vul = CobraResults(self.task_id, rule.id, file_path, line_number, code_content)
                                        db.session.add(vul)
                                        log.info('Insert Results Success')
                    db.session.commit()
                else:
                    log.info('Not Found')

            except Exception as e:
                log.critical('Error calling grep: ' + str(e))

        # Set End Time For Task
        t = CobraTaskInfo.query.filter_by(id=self.task_id).first()
        t.status = 2
        t.file_count = files['file_nums']
        t.time_end = int(time.time())
        t.time_consume = t.time_end - t.time_start
        t.updated_at = time.strftime('%Y-%m-%d %X', time.localtime())
        try:
            db.session.add(t)
            db.session.commit()
        except Exception as e:
            log.critical("Set start time failed:" + e.message)

        log.info("Scan Done")
Beispiel #57
0
 def hook(): log.warning("reloading code")
 server_reloader.main(run_server, before_reload=hook)
def uncompact_addr(c_addr):
    if c_addr[-2:] == '\0\0':
        log.warning('c_addr: %r > port is ZERO' % c_addr)
        raise AddrError
    return (bin_to_ip(c_addr[:-2]), bin_to_int(c_addr[-2:]))