def load_translation(translation_path): """从文件夹中读取所有翻译文件 Args: translation_path (str): 存放翻译 xlsx 文件的路径或文件 Returns: category_to_translated (dict[str: list]): dict<str, list>, 根据 category 归类的翻译 """ file_list = [] if os.path.isfile(translation_path): file_list.append((translation_path, translation_path)) else: for dir_path, dir_names, file_names in os.walk(translation_path): for file_name in file_names: if file_name.lower().endswith('.xlsx') and not file_name.startswith('~'): file_path = os.path.join(dir_path, file_name) file_list.append((file_name, file_path)) category_to_translated = {} for file_name, file_path in file_list: # load from one file log.info('load from %s' % file_name) category, translated_data = load_from_langxls(file_path, "zh", need_check=True) log.info('load %d %ss' % (len(translated_data), category)) if category in category_to_translated: log.warning('warning: override category %s' % category) category_to_translated[category] = translated_data return category_to_translated
def multi_trim(cls, files=[], callback_list=[]): '''多线程批量文件截取 :param: files(List): 待剪切文件配置组成的list。 [ { 'path':'/Users/nut/Downloads/RS/CCAV.mp4', 'trim_times':( ("00:50:22", "01:03:27"), ("01:19:39", "01:37:04"), ... ) }... ] :param: callback_list(List): 处理完文件剪切后的回调函数名组成的list。 ['compress', ...] ''' log.warning('线程:%s, 父进程:%s, <Task (%s) start...>' % ( threading.current_thread().getName(), os.getpid(), sys._getframe().f_code.co_name)) executor = BoundedExecutor(0, 4) for file in files: suffix_number = 0 for time in file.get('trim_times'): suffix_number += 1 log.info(sys._getframe().f_code.co_name, 'suffix_number', suffix_number) future = executor.submit(cls(file.get( 'path')).trim, time=time, suffix_number=suffix_number, lock=executor.lock) for callback in callback_list: future.add_done_callback(getattr(cls, callback)) log.info(sys._getframe().f_code.co_name, 'time, suffix_number', time, suffix_number)
def get_head_token_idx(self, start_token_idx, end_token_idx, msg_prefix=''): self.check_token_idx(start_token_idx) self.check_token_idx(end_token_idx - 1) assert start_token_idx < end_token_idx, \ 'start_token_idx:{} >= end_token_idx:{}'.format( start_token_idx, end_token_idx) head_idx_map = [] for token_idx in range(start_token_idx, end_token_idx): head_trace = [token_idx] while start_token_idx <= head_trace[-1] < end_token_idx: _, head_idx = self.get_parent(head_trace[-1], msg_prefix) # warn if there is a loop in finding one token's head token if head_idx in head_trace: log.warning( '{}: In sentence #{}, token #{} has loop in its head ' 'trace list.'.format(msg_prefix, self._sent_idx, token_idx)) break head_trace.append(head_idx) head_idx_map.append((token_idx, head_trace[-2])) head_idx_list = [head_idx for _, head_idx in head_idx_map] # warn if the tokens in the range don't have the same head token if min(head_idx_list) != max(head_idx_list): log.warning( '{}: In sentence #{}, tokens within the range [{}, {}] do not ' 'have the same head token'.format(msg_prefix, self._sent_idx, start_token_idx, end_token_idx)) return min(head_idx_list)
def get_gdrive_sheet(spreadsheet_name, sheet_name, retries=3): """ Get a google drive spreadsheet Args: spreadsheet_name: name of the document sheet_name: name of the sheet inside the document """ init_gdrive() msg_error = "ConnectionError ({}) when trying to get '{}/{}'. Details: {}" # Open sheet in a way we can have some retries for x in range(retries): try: # Get the spreadsheet spreadsheet = GDRIVE.open(spreadsheet_name) return spreadsheet.worksheet(sheet_name) except ConnectionError as e: log.warning(msg_error.format(x, spreadsheet_name, sheet_name, e)) # Sleep to avoid query limitations sleep(x * 10) # Init gdrive again just in case init_gdrive(force=True) log.error(msg_error.format("last_attempt", spreadsheet_name, sheet_name, e)) raise ValueError("Too many reading attemps in 'get_gdrive_sheet'")
def init_env(config): # set a debug environment variable CUBLAS_WORKSPACE_CONFIG to ":16:8" (may limit overall performance) or ":4096:8" (will increase library footprint in GPU memory by approximately 24MiB). # https://docs.nvidia.com/cuda/cublas/index.html os.environ['CUBLAS_WORKSPACE_CONFIG'] = ":4096:8" if config.seed is not None: if config.seed >= 0: np.random.seed(config.seed) torch.manual_seed(config.seed) torch.set_deterministic(True) torch.backends.cudnn.benchmark = False else: log.warning(f'the random seed should be a non-negative integer') config.device = None if not config.cpu and torch.cuda.is_available(): config.device = torch.device('cuda') else: config.device = torch.device('cpu') # https://github.com/pytorch/pytorch/issues/11201 torch.multiprocessing.set_sharing_strategy('file_system') log.info(f'Using device: {config.device}') config.run_name = '{}_{}_{}'.format( config.data_name, Path(config.config).stem if config.config else config.model_name, datetime.now().strftime('%Y%m%d%H%M%S'), ) log.info(f'Run name: {config.run_name}') return config
def show(title, options, can_exit=True, main_menu=False, decorator='++'): """ Display a menu options: dictionary in which each key is a string and each value is a tuple (string, function), representing the text of the function that will be called when the related string in inserted as input ex: { 'a', ('option a', print) } : print 'option a' and when 'a' is pressed, call the function 'print' """ log.success('{} {} {}'.format(decorator, title, decorator)) for s,f in options.items(): log.warning('({}) {}'.format(s,f[0])) if can_exit: log.warning('(x) Exit') wrong_choice = True while(wrong_choice): arg = input() print() try: if arg=='x' and can_exit: wrong_choice = False quit_menu(main_menu) else: funct = options[arg][1] wrong_choice = False res = funct() quit_menu(main_menu) return res except KeyError as _: log.error('Invalid option, retry:')
def load_translation(translation_path): """从文件夹中读取所有翻译文件 Args: translation_path (str): 存放翻译 xlsx 文件的路径或文件 Returns: category_to_translated (dict[str: list]): dict<str, list>, 根据 category 归类的翻译 """ file_list = [] if os.path.isfile(translation_path): file_list.append((translation_path, translation_path)) else: for dir_path, dir_names, file_names in os.walk(translation_path): for file_name in file_names: if file_name.lower().endswith( '.xlsx') and not file_name.startswith('~'): file_path = os.path.join(dir_path, file_name) file_list.append((file_name, file_path)) category_to_translated = {} for file_name, file_path in file_list: # load from one file log.info('load from %s' % file_name) category, translated_data = load_from_langxls(file_path, "zh", need_check=True) log.info('load %d %ss' % (len(translated_data), category)) if category in category_to_translated: log.warning('warning: override category %s' % category) category_to_translated[category] = translated_data return category_to_translated
def query_pair(origin, destination, n_days=366): """ Query all flights between 2 airports Args: origin: code for origin airport destination: code for destination airport n_days: max days of history """ # Start at day 1 since it will only query when day==1 start_day = date.today() dfs = [] for x in range(n_days): query_day = start_day + timedelta(x) # Only do first day of month if (query_day.day != 1) and (query_day != start_day): log.trace(f"Skiping day '{query_day}'") continue response = query_flights(origin, destination, query_day) data = response.json() if data["Quotes"]: dfs.append(parse_data(data)) if dfs: return pd.concat(dfs).reset_index(drop=True) else: log.warning(f"No flights from '{origin}' to '{destination}'")
def _make_task(self, task_info): if task_info.type in self.types: task_class = self.types[task_info.type] task_data = self.storage[task_info.id] return task_class(task_info, task_data, self.signal) else: log.warning('Cannot handle task type "{}"', task_info.type)
def set_head_token_idx(self, dep_graph): check_type(dep_graph, DependencyGraph) if self.head_token_idx != -1: log.warning('Overriding existing head_token_idx {}'.format( self.head_token_idx)) self.head_token_idx = dep_graph.get_head_token_idx( self.start_token_idx, self.end_token_idx)
def download(addon): # TODO: prevent multiple downloads. current = addon['current_version'] for file_obj in current['files']: url = file_obj['url'] filename = file_obj['id'] directories = get_directories(addon['id']) target = '{}.xpi'.format( os.path.join(directories['files'], str(file_obj['id'])), ) if os.path.exists(target): log.info('{}: Skipping download'.format(addon['id'])) continue res = requests.get(url) if res.status_code == 404: log.warning('{}: got a 404'.format(addon['id'])) continue else: res.raise_for_status() with open(target, 'wb') as filehandle: for chunk in res.iter_content(10000): filehandle.write(chunk) log.info('{}: Downloaded file: {}'.format(addon['id'], file_obj['id']))
def inner(*args, **kwargs): start_time = time.time() ret = func(*args, **kwargs) log.warning('线程:%s, 父进程:%s, 耗时:%s, <Task (%s) finished!!!>' % (threading.current_thread().getName(), os.getpid(), time.time() - start_time, func.__name__)) return ret
def load_from_list_category(data, lang, need_check=True): """解析从 list<text> 模式的 xlsx 中读取的翻译 Args: data: 从 xlsx 读出的数据,data[i][j] 表示第 i 行第 j 列的数据 lang (str): "zh"/"en", 读中文还是英文 need_check (bool): 是否检查 Returns: category (str): category from lang_def translated_data (list[str]): list of [file_id, unknown, index, text] """ # check if need_check: for row in data: if row[4] != '' and not check_string_with_origin(row[3], row[4]): log.warning('check string failed: %s', str(row[1])) # 删除多余数据,只保留 内部编号, 英文/中文 if lang == 'en': data = [(row[1], row[3]) for row in data] else: data = [(row[1], row[4]) for row in data] category = data[0][0].rsplit('-', 3)[0] # 恢复编号 translated_data = [] for intern_id, text in data: if intern_id != '' and text != '': file_id, unknown, index = [str(int(x)) for x in intern_id.rsplit('-', 3)[1:]] # str 形式,不带前导0 translated_data.append([file_id, unknown, index, text]) return category, translated_data
def build_predicates(self): assert len(self.all_instances) > 0 assert self.treebank_reader is not None assert self.nombank_reader is not None assert self.predicate_mapping is not None assert self.corenlp_reader is not None if len(self.all_predicates) > 0: log.warning('Overriding existing predicates') self.all_predicates = [] log.info('Building predicates') for instance in self.all_instances: predicate = Predicate.build(instance) predicate.set_pred(self.predicate_mapping[str( predicate.pred_pointer)]) self.all_predicates.append(predicate) log.info('Checking explicit arguments with Nombank instances') for predicate in self.all_predicates: nombank_instance = self.nombank_reader.search_by_pointer( predicate.pred_pointer) predicate.check_exp_args(nombank_instance, add_missing_args=False, remove_conflict_imp_args=False, verbose=False) log.info('Parsing all implicit and explicit arguments') for predicate in self.all_predicates: predicate.parse_args( self.treebank_reader, self.corenlp_reader, include_non_head_entity=self.include_non_head_entity) log.info('Done')
async def task_info(chat, cq, match, user_data): await cq.answer() task_name = match.group(1) if task_name not in user_data['tasks']: log.warning('Task name \'{}\' not in user tasks {}', task_name, user_data['tasks'].keys()) log.debug('User data: {}', user_data) return await error_message(chat, None, user_data) task = user_data['tasks'][task_name] info = ''' Task info: Name: {} State: {} URL: {} Blacklist: {} Whitelist: {} '''.format(task.name, 'active' if task.state else 'inactive', task.args['url'], ', '.join(task.args['blacklist']), ', '.join(task.args['whitelist'])) delete_inline = make_inline_keyboard( 1, (Buttons.TASK_DELETE, Constants.TASK_DELETE_TMPL.format(task_name)), to_json=False) return await chat.edit_text(chat.message['message_id'], info, markup=delete_inline)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--prefix', type=str, default='default') parser.add_argument('--model', type=str, default='baseline', choices=['baseline', 'rn', 'film']) # model architecture parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--learning_rate', type=float, default=1e-4) parser.add_argument( '--lr_weight_decay', action='store_true', default=False ) # if True, learning rate is decayed with exponential scheduling parser.add_argument('--checkpoint', type=str, default=None) parser.add_argument('--dataset_path', type=str, default='datasets/SortOfCLEVR_4_200000_32') parser.add_argument('--image_size', type=int, default=32) config = parser.parse_args() train_dataset = SortOfCLEVR(config.dataset_path, split='train') val_dataset = SortOfCLEVR(config.dataset_path, split='val') trainer = Trainer(config, train_dataset, val_dataset) log.warning("dataset: %s, learning_rate: %f", config.dataset_path, config.learning_rate) trainer.train()
def push(self): """Push todo to gist.github.com""" github = Github() gist_id = GistId().get() token = GithubToken().get() github.login(token) if not self.todo.name: name = "Todo" else: name = self.todo.name files = { name: { "content": self.todo_content } } log.info( "Pushing '%s' to https://gist.github.com/%s .." % (name, gist_id) ) response = github.edit_gist(gist_id, files=files) if response.status_code == 200: log.ok("Pushed success.") elif response.status_code == 401: log.warning("Github token out of date, empty the old token") GithubToken().save('') # empty the token! self.push() # and repush else: log.error("Pushed failed. %d" % response.status_code)
def upload(self, src, after_copy=None): log.info("Mounting the {} folder".format(self._dest)) try: filename = path.basename(src) call(['fusermount', '-u', '-z', self._mount_point]) if not path.isdir(self._mount_point): mkdir(self._mount_point) cmd = 'sshfs {} {}@{}:{} {} -o IdentityFile={},StrictHostKeyChecking=no'.format( "-p " + str(self._port), self._user, self._host, self._dest, self._mount_point, self._identity) print(cmd) c, _, __ = self._run_process(cmd.split(' ')) if c != 0: raise RuntimeError('Can\'t mount remote FS') shutil.copy2(src, path.join(self._mount_point, filename)) if callable(after_copy): after_copy(self) except Exception as e: log.warning( 'Unable to copy file to remote location. Exception: {}'.format( e)) raise finally: self._run_process(['fusermount', '-u', '-z', self._mount_point])
def prepare(): log.info('prepare content') try: content.main(action='extract') create_sample_players() except Exception, e: log.warning('can not extract content: %s', e)
def multi_compress(cls, directory='', callback_list=[]): '''多线程批量文件压缩 :param: directory(String): 待压缩文件所在的目录绝对地址。 '/usr/media/' :param: callback_list(List): 处理完文件压缩后的回调函数名组成的list。 ['func', ...] ''' log.warning('父进程:%s, 线程:%s, <Task (%s) start...>' % (os.getpid(), threading.current_thread().getName(), sys._getframe().f_code.co_name)) executor = BoundedExecutor(0, 4) directory = directory.strip() if os.path.isdir(directory): file_path_list = os.listdir(directory) log.info(sys._getframe().f_code.co_name, 'file_path_list', file_path_list) for file_path in file_path_list: future = executor.submit(cls.compress, file_path=os.path.join( directory, file_path)) log.info(sys._getframe().f_code.co_name, 'file_path', file_path, future) for callback in callback_list: future.add_done_callback(getattr(cls, callback)) executor.shutdown(wait=True)
def main(): if sys.argv[1] == '1': update_translation() elif sys.argv[1] == '2': merge_translation() else: log.warning('unknown args') sys.exit(-2)
def test_invalid_response_received(self): # Response received is invalid self.ping_r_in._msg_dict[message.RESPONSE] = 'zz' ok_(not self.got_response) log.warning( "**IGNORE WARNING LOG**") self.query.on_response_received(self.ping_r_in) ok_(not self.got_response)
def evaluate(self, recommendations, test_urm, at_k=10, single_ap=False, verbose=True): """ Return the MAP@k evaluation for the provided recommendations computed with respect to the test_urm Parameters ---------- recommendations : list List of recommendations, where a recommendation is a list (of length N+1) of playlist_id and N items_id: [ [7, 18,11,76, ...] , [13, 65,83,32, ...] , [25, 30,49,65, ...] , ... ] test_urm : csr_matrix A sparse matrix at_k : int, optional The number of items to compute the precision at single_ap: bool, optional If True, return also the array of AP for each user Returns ------- MAP@k: (float) MAP for the provided recommendations """ if not at_k > 0: log.error('Invalid value of k {}'.format(at_k)) return start = time.time() aps = 0.0 ap_array = [] for r in recommendations: row = test_urm.getrow(r[0]).indices m = min(at_k, len(row)) ap = 0.0 n_elems_found = 0.0 for j in range(1, m+1): if r[j] in row: n_elems_found += 1 ap = ap + n_elems_found/j if m > 0: ap = ap/m aps += ap if single_ap: ap_array.append(ap) result = aps/len(recommendations) print('MAP computed in {:.2f} s'.format(time.time() - start)) if verbose: log.warning('MAP: {}'.format(result)) if single_ap: return result, ap_array else: return result
async def recv(self): async for data in self.socket: msg = Message.load(data) if msg is not None: # log.debug(f'Received: {msg} from {self}') await self.dispatcher.dispatch(msg, self) else: log.warning(f'Bad message:\n{data}') self.rank -= 1
def run(self, target=None, tid=None, pid=None): if target is None: log.critical("Please set --target param") sys.exit() if tid is not None: task_id = tid # Start Time For Task t = CobraTaskInfo.query.filter_by(id=tid).first() if t is None: log.critical("Task id doesn't exists.") sys.exit() if t.status not in [0, 1]: log.critical("Task Already Scan.") sys.exit() t.status = 1 t.time_start = int(time.time()) t.updated_at = time.strftime('%Y-%m-%d %X', time.localtime()) try: db.session.add(t) db.session.commit() except Exception as e: log.error("Set start time failed" + str(e.message)) else: task_id = None target_type = self.parse_target(target) if target_type is False: log.error(""" Git Repository: must .git end SVN Repository: can http:// or https:// Directory: must be local directory File: must be single file or tar.gz/zip/rar compress file """) from engine import static s = static.Static(target, task_id=task_id, project_id=pid) if target_type is 'directory': s.analyse() elif target_type is 'compress': from utils.decompress import Decompress # load an compressed file. only tar.gz, rar, zip supported. dc = Decompress(target) # decompress it. And there will create a directory named "222_test.tar". dc.decompress() s.analyse() elif target_type is 'file': s.analyse() elif target_type is 'git': from pickup.GitTools import Git g = Git(target, branch='master') g.get_repo() if g.clone() is True: s.analyse() else: log.critical("Git clone failed") elif target_type is 'svn': log.warning("Not Support SVN Repository")
def main(): if sys.argv[1] == '1': gen_chs() elif sys.argv[1] == '2': gen_cht() elif sys.argv[1] == '3': gen_chs_force() else: log.warning('unknown args') sys.exit(-2)
async def parse(self, url): host = detect_host(url) handler = getattr(self, 'parse_' + host, None) if not handler: log.warning('Unsupported URL ({}): {}', host, url) return async with aiohttp.ClientSession() as session: async with session.get(url) as page: document = html.fromstring(await page.read()) return handler(document)
def rep_mention(self, rep_mention): check_type(rep_mention, Mention) if self._rep_mention is not None: if self._rep_mention.has_same_span(rep_mention): return else: log.warning('Overriding existing rep_mention ({})'.format( self._rep_mention)) self._rep_mention.rep = False self._rep_mention = rep_mention
def get_parent(self, token_idx, msg_prefix=''): parent = self.lookup('mod', token_idx, include_extra=False) if len(parent) == 0: return 'root', -1 if len(parent) > 1 or len(parent.items()[0][1]) > 1: log.warning( '{}: In sentence #{}, token #{} has more than 1 non-extra ' 'head token: {}'.format(msg_prefix, self._sent_idx, token_idx, parent)) return parent.items()[0][0], parent.items()[0][1][0]
def main(): lang = 'en' # getopt try: opts, args = getopt.getopt(sys.argv[1:], 'l:') except getopt.GetoptError as e: log.error(e) sys.exit(2) for o, a in opts: if o == '-l': lang = a cd = sys.path[0] translation_path = os.path.join(cd, '../translation/lang') dest_path = translation_path # load translation lines_grouped_by_id = {} # ID 作为 key, 每个 key 对应一个 list,每个 list 中的成员是原文件中的一行 translate_file = os.path.join(translation_path, '%s.lang.csv' % lang) with open(translate_file, 'rt', encoding='utf-8') as fp: fp.readline() lines = fp.readlines() # split for line in lines: _id = line.split(',', 1)[0] _id = _id[1:-1] # remove " if _id not in lines_grouped_by_id.keys(): lines_grouped_by_id[_id] = [] lines_grouped_by_id[_id].append(line) for _id, lines_with_same_id in sorted(lines_grouped_by_id.items()): target_file = os.path.join(dest_path, '%s.%s.lang.csv' % (lang, _id)) log.debug('split to file %s' % target_file) with open(target_file, 'wt', encoding='utf-8') as fp: fp.writelines(lines_with_same_id) # known id known_id = set() for values in (file_id_of_pair.values(), file_id_of_list.values(), file_id_of_array.values()): for id_tuple in values: for _id in id_tuple: known_id.add(_id) known_id = known_id | ignored_file_id # file list target_file = os.path.join(dest_path, '%s.lang.split.txt' % lang) with open(target_file, 'wt', encoding='utf-8') as fp: id_list = sorted([int(_id) for _id in lines_grouped_by_id.keys()]) for _id in id_list: fp.write('%d\n' % _id) if str(_id) not in known_id: log.warning('warning: unknown id %d.' % _id)
def map_keys(self, f, merge, warn=False): y = {} for k, v in self.items.items(): z = f(k) if z in y: if (warn): warning("Duplicate key: {}".format(z)) y[z] = merge(y[z], v) # merge these 2 values else: y[z] = v return DictionaryCollection(self, y)
def get_face(self, filename, landmarks=None, size=(cfg.CROP_SIZE, cfg.CROP_SIZE), use_cache=True, from_sequence=False): # landmarks = np.zeros((68, 2), dtype=np.float32) # pose = np.zeros(3, dtype=np.float32) crop_filepath = os.path.join(self.cropped_img_dir, filename + '.jpg') if use_cache and os.path.isfile(crop_filepath): try: crop = io.imread(crop_filepath) except OSError: os.remove(crop_filepath) return self.get_face(filename, landmarks, size, use_cache, from_sequence) if crop.shape[:2] != size: crop = cv2.resize(crop, size, interpolation=cv2.INTER_CUBIC) if landmarks is None: of_conf, landmarks, _ = ds_utils.read_openface_detection( os.path.join(self.feature_dir, filename), numpy_lmFilepath=os.path.join(self.npfeature_dir, filename)) landmarks = face_processing.scale_landmarks_to_crop( landmarks, output_size=size) else: # Load image from dataset img_path = os.path.join(self.fullsize_img_dir, filename + '.jpg') img = io.imread(img_path) if img is None: raise IOError( "\tError: Could not load image {}!".format(img_path)) # load landmarks extracted with OpenFace2 if landmarks is None: of_conf, landmarks, _ = ds_utils.read_openface_detection( os.path.join(self.feature_dir, filename), numpy_lmFilepath=os.path.join(self.npfeature_dir, filename), from_sequence=from_sequence) if of_conf <= 0.0: log.warning("No landmarks for image {}".format(filename)) # crop, landmarks = face_processing.crop_bump(img, landmarks, output_size=size) crop, landmarks = face_processing.crop_celebHQ(img, landmarks, output_size=size) if use_cache: utils.io.makedirs(crop_filepath) io.imsave(crop_filepath, crop) return crop, landmarks
def run(self, num_factors, urm_train=None, urm=None, urm_test=None, targetids=None, with_scores=False, export=True, verbose=True): """ Run the model and export the results to a file Parameters ---------- num_factors : int, number of latent factors urm : csr matrix, URM. If None, used: data.get_urm_train(). This should be the entire URM for which the targetids corresponds to the row indexes. urm_test : csr matrix, urm where to test the model. If None, use: data.get_urm_test() targetids : list, target user ids. If None, use: data.get_target_playlists() Returns ------- recs: (list) recommendations map10: (float) MAP10 for the provided recommendations """ _urm = data.get_urm_train() _icm = data.get_icm() _urm_test = data.get_urm_test() _targetids = data.get_target_playlists() #_targetids = data.get_all_playlists() start = time.time() urm_train = _urm if urm_train is None else urm_train #urm = _urm if urm is None else urm urm_test = _urm_test if urm_test is None else urm_test targetids = _targetids if targetids is None else targetids self.fit(urm_train=urm_train, num_factors=num_factors) recs = self.recommend_batch(userids=targetids) map10 = None if len(recs) > 0: map10 = self.evaluate(recs, test_urm=urm_test, verbose=verbose) else: log.warning('No recommendations available, skip evaluation') if export: exportcsv(recs, path='submission', name=self.name, verbose=verbose) if verbose: log.info('Run in: {:.2f}s'.format(time.time() - start)) return recs, map10
def stop_server(_=None): try: with open('/tmp/infnote_chain.pid', 'r') as file: pid = int(file.readline()) os.kill(pid, signal.SIGTERM) log.info(f'Killed server by PID {pid}') except FileNotFoundError: log.warning( 'PID file is not exist. Process may not startup correctly.') except ProcessLookupError: log.warning(f'No such process PID {pid}.')
def do_verify(self, trans_receipt, timeout=10): url = INAPP_PURCHASE_VERIFY_URL resp = self._send_verify(url, trans_receipt, timeout) if 21007 == resp.get("status"): # player use an sendbox account log_data = {"player_id": self.player_id, "resp": resp, "receipt": trans_receipt} log.warning("IAP - Player use an sendbox account: %s" % str(log_data)) resp = self._send_verify(INAPP_PURCHASE_SANDBOX_VERIFY_URL, trans_receipt, timeout) return resp
def get_with_retry(self, key): """ Like GConnect.get() but retries on temporary errors. """ while True: try: val = self.get(key) return val except TemporaryFailError, e: log.warning("TemporaryFailError: can't get key '%s' - " "retry in 2 secs...", key) time.sleep(2) continue
def _uncompact_nodes2(c_nodes): nodes = [] for c_node in c_nodes: node_id = Id(c_node[:ID_SIZE_BYTES]) try: node_addr = uncompact_addr(c_node[ID_SIZE_BYTES:]) except (AddrError): log.warning('IPv6 addr in nodes2: %s' % c_node) except (AddrError): pass else: node = Node(node_addr, node_id) nodes.append(node) return nodes
def test_block_flood(self): from floodbarrier import MAX_PACKETS_PER_PERIOD as FLOOD_LIMIT for _ in xrange(FLOOD_LIMIT): self.client_r.sendto(DATA, tc.SERVER_ADDR) for _ in xrange(10): self.client_r.sendto(DATA, tc.SERVER_ADDR) log.warning( "TESTING LOGS ** IGNORE EXPECTED WARNING **") time.sleep(tc.TASK_INTERVAL) with self.lock: log.debug('datagram processed: %d/%d' % ( len(self.datagrams_received), FLOOD_LIMIT)) assert len(self.datagrams_received) <= FLOOD_LIMIT
def test_fire_callback_on_late_response(self): self.query.timeout_task.fire_callbacks() self.query.timeout_task.cancel() # the server creates the response pong_msg = message.OutgoingPingResponse(tc.SERVER_ID) pong_data = pong_msg.encode(tc.TID) # rpc_m decodes the response received pong_msg = message.IncomingMsg(pong_data) # querier notifies of the message (but it's too late) self.query.on_response_received(pong_msg) log.warning( "**IGNORE WARNING LOG**") assert not self.got_response and not self.got_error \ and self.got_timeout
def apply_one_translate(self, name, origin, translation, need_check=True): """应用一条翻译 先检查原文是否一致。不检查更新。 Args: name (str): 名字, SI_ 开头 origin (str): 原文 translation (str): 译文 need_check (bool): 是否检查 """ if name in self.ui_lines.keys() and origin == self.ui_lines[name].origin: if need_check and not check_string_with_origin(translation, origin): log.warning('check string failed: %s', name) self.ui_lines[name].set_translation(translation)
def config_pytorch(options): """Config pytorch packages. Fix random number for packages and initialize distributed environment for pytorch. Setup cuda environment for pytorch. :param options: A global object containing specified options. :type options: argparse.Namespace """ # Setting `cudnn.deterministic = True` will turn on # CUDNN deterministic setting which can slow down training considerably. # Unexpected behavior may also be observed from checkpoint. # See: https: // github.com/pytorch/examples/blob/master/imagenet/main.py if options.cudnn_deterministic: cudnn.deterministic = True log.warning('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.', 0) if options.seed is not None: random.seed(options.seed) torch.manual_seed(options.seed) # define the graph for the computation. if options.use_cuda: assert torch.cuda.is_available() options.rank = dist.get_rank() options.world_size = dist.get_world_size() options.graph = FCGraph(options) # enable cudnn accelerator if we are using cuda. if options.use_cuda: options.graph.assigned_gpu_id() torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True if torch.backends.cudnn.version() is None: log.warning("CUDNN not found on device.") log.info("World size={}, Rank={}, hostname={}, cuda_available={}, cuda_device={}".format( options.world_size, options.rank, socket.gethostname(), torch.cuda.is_available(), torch.cuda.current_device()))
def on_response_received(self, response_msg): try: response_msg.sanitize_response(self.query) except (message.MsgError): log.exception( "We don't like dirty reponses: %r|nresponse ignored" % response_msg) return # Response ignored self.node.is_ns = response_msg.ns_node if self.node.id: if response_msg.sender_id != self.node.id: return # Ignore response else: self.node.id = response_msg.sender_id #TODO2: think whether late responses should be accepted if self.timeout_task.cancelled: log.warning( "Response recevived but it's too late!!\n%r, %r" % (response_msg, self.timeout_task)) return # Ignore response self.timeout_task.cancel() nodes = [] try: nodes.extend(response_msg.nodes) except (AttributeError): pass try: nodes.extend(response_msg.nodes2) except (AttributeError): pass # Notify routing manager (if nodes found). # Do not notify when the query was a GET_PEERS because # the lookup is in progress and the routing_m shouldn't # generate extra traffic. if self.query == message.FIND_NODE and \ nodes and self.notify_routing_m_on_nodes_found_f: self.notify_routing_m_on_nodes_found_f(nodes) # Notify routing manager (response) self.node.is_ns = response_msg.ns_node if self.notify_routing_m_on_response_f: self.notify_routing_m_on_response_f(self.node) # Do callback to whomever did the query self.on_response_f(response_msg, self.node) return True # the response was fine
def load_from_pair_category(data, lang, need_check=True): """解析从 <name, desc> 模式的 xlsx 中读取的翻译 Args: data: 从 xlsx 读出的数据,data[i][j] 表示第 i 行第 j 列的数据 lang (str): "zh"/"en", 读中文还是英文 need_check (bool): 是否检查 Returns: category (str): category from lang_def translated_data (list[str]): list of [file_id, unknown, index, text] """ # check if need_check: for row in data: if (row[4] != '' and not check_string_with_origin(row[3], row[4])) \ or (row[7] != '' and not check_string_with_origin(row[6], row[7])): log.warning('check string failed: %s', str(row[1])) # 删除多余数据,只保留 内部编号, 英文/中文名称, 英文/中文描述 if lang == 'en': data = [(row[1], row[3], row[6]) for row in data] else: data = [(row[1], row[4], row[7]) for row in data] category = data[0][0].rsplit('-', 1)[0] name_file_id, desc_file_id = file_id_of_pair[category] # 恢复编号 translated_data = [] for intern_id, name, desc in data: if intern_id == '': continue # 空行 index = intern_id.rsplit('-', 1)[-1] index = str(int(index)) # 消除前导0 # 这里直接令 unknown 为 0,对此类数据,暂时没有发现例外 unknown = '0' if name != '': translated_data.append([name_file_id, unknown, index, name]) if desc != '': translated_data.append([desc_file_id, unknown, index, desc]) return category, translated_data
def init_cb(): global cb bucket = settings.COUCHBASE_BUCKET host = settings.COUCHBASE_HOST log.debug("connecting to bucket '%s' on host '%s'", bucket, host) if bucket and host: while True: try: cb = GConnectionExtention(bucket=bucket, host=host, quiet=True) log.debug("connected") break except NetworkError, e: log.warning("NetworkError: can't connect to host '%s' - " "retry in 5 secs...", host) except BucketNotFoundError, e: log.warning("BucketNotFoundError: can't find bucket '%s' " "on host '%s' - retry in 5 secs...", bucket, host) time.sleep(5) continue
def test_listen_upd(self): r = ThreadedReactor() r.start() log.warning(''.join( ('TESTING LOGS ** IGNORE EXPECTED WARNING ** ', '(udp_listen has not been called)'))) self.client_r.sendto(DATA, tc.SERVER_ADDR) while 1: #waiting for data with self.lock: if self.datagrams_received: break time.sleep(tc.TASK_INTERVAL) with self.lock: first_datagram = self.datagrams_received.pop(0) log.debug('first_datagram: %s, %s' % ( first_datagram, (DATA, tc.CLIENT_ADDR))) assert first_datagram, (DATA, tc.CLIENT_ADDR) r.stop()
def run(self): """Main loop activated by calling self.start()""" last_task_run = time.time() stop_flag = self.stop_flag while not stop_flag: timeout_raised = False try: data, addr = self.s.recvfrom(BUFFER_SIZE) except (AttributeError): log.warning('udp_listen has not been called') time.sleep(self.task_interval) #TODO2: try using Event and wait timeout_raised = True except (socket.timeout): timeout_raised = True except (socket.error), e: log.critical( 'Got socket.error when receiving (more info follows)') log.exception('See critical log above') else: ip_is_blocked = self.floodbarrier_active and \ self.floodbarrier.ip_blocked(addr[0]) if ip_is_blocked: log.warning('%s blocked' % `addr`) else: self.datagram_received_f(data, addr) if timeout_raised or \ time.time() - last_task_run > self.task_interval: #with self._lock: self._lock.acquire() try: while True: task = self.tasks.consume_task() if task is None: break task.fire_callbacks() stop_flag = self.stop_flag finally: self._lock.release()
def on_response_received(self, response_msg, addr): # TYPE and TID already sanitized by rpc_manager log.debug('response received: %s' % repr(response_msg)) try: addr_query_list = self.pending[addr] except (KeyError): log.warning('No pending queries for %s', addr) return # Ignore response # There are pending queries from node (let's find the right one (TID) query_found = False for query_index, query in enumerate(addr_query_list): log.debug('response node: %s, query:\n(%s, %s)' % ( `addr`, `query.tid`, `query.query`)) if query.matching_tid(response_msg.tid): query_found = True break if not query_found: log.warning('No query for this response\n%s\nsource: %s' % ( response_msg, addr)) return # ignore response # This response matches query. Trigger query's callback response_is_ok = query.on_response_received(response_msg) if response_is_ok: # Remove this query from pending if len(addr_query_list) == 1: # There is one item in the list. Remove the whole list. del self.pending[addr] else: del addr_query_list[query_index] else: log.warning('Bad response from %r\n%r' % (addr, response_msg))
def get_csv_from_xls(translation_path, lang): """从文件夹中读取所有翻译文件 Args: translation_path (str): 存放翻译 xlsx 文件的路径或文件 lang (str): "zh"/"en", 读中文还是英文 Returns: csv_list (dict[str: list]): dict<str, list>, 根据 category 归类的翻译 ui_xls_file: UI翻译文件,如果找到了 """ file_list = [] if os.path.isfile(translation_path): file_list.append((translation_path, translation_path)) else: for dir_path, dir_names, file_names in os.walk(translation_path): for file_name in file_names: if file_name.lower().endswith('.xlsx') and not file_name.startswith('~') \ and (file_name.startswith('en.') or lang == 'zh'): file_path = os.path.join(dir_path, file_name) file_list.append((file_name, file_path)) category_to_translated = {} ui_xls_file = None for file_name, file_path in file_list: # load from one file log.info('load from %s' % file_name) category, translated_data = load_from_langxls(file_path, lang, need_check=False, load_ui=True) log.info('load %d %ss' % (len(translated_data), category)) if category in category_to_translated: log.warning('warning: override category %s' % category) category_to_translated[category] = translated_data if category == 'UI': ui_xls_file = file_path list_list = [line for _, translated_data in sorted(category_to_translated.items()) for line in translated_data] csv_list = ['"%s","%s","%s","0","%s"\n' % (line[0], line[1], line[2], line[3]) for line in list_list] return csv_list, ui_xls_file
def load_from_ui_fake(data, lang, need_check=True): """解析从UI模式的 xlsx 中读取的翻译 Args: data: 从 xlsx 读出的数据,data[i][j] 表示第 i 行第 j 列的数据 lang (str): "zh"/"en", 读中文还是英文 need_check (bool): 是否检查 Returns: category (str): "UI" translated_data (list[str]): list of [file_id, unknown, index, text] """ # check if need_check: for row in data: if (row[2] != '' and not check_string_with_origin(row[3], row[4])) \ or (row[3] != '' and not check_string_with_origin(row[6], row[7])): log.warning('check string failed: %s', str(row[1])) # 删除多余数据,只保留 名称, 原文/译文 if lang == 'en': data = [(row[1], row[2]) for row in data] else: data = [(row[1], row[3]) for row in data] category = 'UI' # 恢复编号 translated_data = [] for intern_id, text in data: if intern_id == '': continue # 空行 # file_id=='UI', unknown=='0', index==intern_id index = intern_id unknown = '0' translated_data.append([category, unknown, index, text]) return category, translated_data
def test_tools(self): bin_strs = ["23", "\1\5", "a\3"] for bs in bin_strs: i = bin_to_int(bs) bs2 = int_to_bin(i) log.debug("bs: %s, bin_to_int(bs): %d, bs2: %s" % (bs, i, bs2)) assert bs == bs2 ips = ["127.0.0.1", "222.222.222.222", "1.2.3.4"] ports = [12345, 99, 54321] for addr in zip(ips, ports): c_addr = compact_addr(addr) addr2 = uncompact_addr(c_addr) assert addr == addr2 c_peers = message._compact_peers(tc.PEERS) peers = message._uncompact_peers(c_peers) for p1, p2 in zip(tc.PEERS, peers): assert p1[0] == p2[0] assert p1[0] == p2[0] c_nodes = message._compact_nodes(tc.NODES) nodes = message._uncompact_nodes(c_nodes) for n1, n2 in zip(tc.NODES, nodes): assert n1 == n2 bin_ipv6s = ["\x00" * 10 + "\xff\xff" + "\1\2\3\4", "\x22" * 16] assert bin_to_ip(bin_ipv6s[0]) == "1.2.3.4" assert_raises(AddrError, bin_to_ip, bin_ipv6s[1]) PORT = 7777 BIN_PORT = int_to_bin(PORT) c_nodes2 = [tc.CLIENT_ID.bin_id + ip + BIN_PORT for ip in bin_ipv6s] nodes2 = [node.Node(("1.2.3.4", PORT), tc.CLIENT_ID)] log.debug(message._uncompact_nodes2(c_nodes2)) assert message._uncompact_nodes2(c_nodes2) == nodes2 log.warning("**IGNORE WARNING LOG** This exception was raised by a test")
def sanitize_response(self, query): self._sanitize_common() # sender_id self.sender_id = self._get_id(RESPONSE, ID) if query in [FIND_NODE, GET_PEERS]: # nodes nodes_found = False c_nodes = self._get_str(RESPONSE, NODES, optional=True) if c_nodes: self.nodes = _uncompact_nodes(c_nodes) nodes_found = True # nodes2 try: self.nodes2 = _uncompact_nodes2( self._msg_dict[RESPONSE][NODES2]) if nodes_found: log.info('Both nodes and nodes2 found') nodes_found = True except (KeyError): pass if query == FIND_NODE: if not nodes_found: log.warning('No nodes in find_node response') raise MsgError, 'No nodes in find_node response' elif query == GET_PEERS: # peers try: self.peers = _uncompact_peers( self._msg_dict[RESPONSE][VALUES]) if nodes_found: log.warning( 'Nodes and peers found in get_peers response') except (KeyError): if not nodes_found: log.warning( 'No nodes or peers found in get_peers response') raise (MsgError, 'No nodes or peers found in get_peers response') # token self.token = self._get_str(RESPONSE, TOKEN)
def check_xls(src_path, column_id, origin_column_id): """检查xls Args: src_path (str): 待检查的 xlsx 文件的路径 column_id (int): 翻译后的列的 id origin_column_id (int): 原文的列的 id """ data = load_xls(src_path) for line in data: text_is_ok = False try: text_to_check = line[column_id] # skip empty line if text_to_check == '': continue text_is_ok = check_string(text_to_check) if origin_column_id is not None: text_is_ok &= check_string_with_origin(line[column_id], line[origin_column_id]) except Exception as e: log.warning(line) log.warning(e) if not text_is_ok: log.warning('Failed when checking:\n%s\n' % ', '.join(line))
def analyse(self): if self.directory is None: log.critical("Please set directory") sys.exit() log.info('Start code static analyse...') d = directory.Directory(self.directory) files = d.collect_files(self.task_id) log.info('Scan Files: {0}, Total Time: {1}s'.format(files['file_nums'], files['collect_time'])) ext_language = { # Image '.jpg': 'image', '.png': 'image', '.bmp': 'image', '.gif': 'image', '.ico': 'image', '.cur': 'image', # Font '.eot': 'font', '.otf': 'font', '.svg': 'font', '.ttf': 'font', '.woff': 'font', # CSS '.css': 'css', '.less': 'css', '.scss': 'css', '.styl': 'css', # Media '.mp3': 'media', '.swf': 'media', # Execute '.exe': 'execute', '.sh': 'execute', '.dll': 'execute', '.so': 'execute', '.bat': 'execute', '.pl': 'execute', # Edit '.swp': 'tmp', # Cert '.crt': 'cert', # Text '.txt': 'text', '.csv': 'text', '.md': 'markdown', # Backup '.zip': 'backup', '.bak': 'backup', '.tar': 'backup', '.rar': 'backup', '.tar.gz': 'backup', '.db': 'backup', # Config '.xml': 'config', '.yml': 'config', '.spf': 'config', '.iml': 'config', '.manifest': 'config', # Source '.psd': 'source', '.as': 'source', # Log '.log': 'log', # Template '.template': 'template', '.tpl': 'template', } for ext in files: if ext in ext_language: log.info('{0} - {1}'.format(ext, files[ext])) continue else: log.info(ext) languages = CobraLanguages.query.all() rules = CobraRules.query.filter_by(status=1).all() extensions = None # `grep` (`ggrep` on Mac) grep = '/bin/grep' # `find` (`gfind` on Mac) find = '/bin/find' if 'darwin' == sys.platform: ggrep = '' gfind = '' for root, dir_names, file_names in os.walk('/usr/local/Cellar/grep'): for filename in file_names: if 'ggrep' == filename: ggrep = os.path.join(root, filename) for root, dir_names, file_names in os.walk('/usr/local/Cellar/findutils'): for filename in file_names: if 'gfind' == filename: gfind = os.path.join(root, filename) if ggrep == '': log.critical("brew install ggrep pleases!") sys.exit(0) else: grep = ggrep if gfind == '': log.critical("brew install findutils pleases!") sys.exit(0) else: find = gfind for rule in rules: log.info('Scan rule id: {0} {1} {2}'.format(self.project_id, rule.id, rule.description)) # Filters for language in languages: if language.id == rule.language: extensions = language.extensions.split('|') if extensions is None: log.critical("Rule Language Error") sys.exit(0) # White list white_list = [] ws = CobraWhiteList.query.filter_by(project_id=self.project_id, rule_id=rule.id, status=1).all() if ws is not None: for w in ws: white_list.append(w.path) try: if rule.regex.strip() == "": filters = [] for index, e in enumerate(extensions): if index > 1: filters.append('-o') filters.append('-name') filters.append('*' + e) # Find Special Ext Files param = [find, self.directory, "-type", "f"] + filters else: filters = [] for e in extensions: filters.append('--include=*' + e) # Explode SVN Dir filters.append('--exclude-dir=.svn') filters.append('--exclude-dir=.cvs') filters.append('--exclude-dir=.hg') filters.append('--exclude-dir=.git') filters.append('--exclude-dir=.bzr') filters.append('--exclude=*.svn-base') # -n Show Line number / -r Recursive / -P Perl regular expression param = [grep, "-n", "-r", "-P"] + filters + [rule.regex, self.directory] # log.info(' '.join(param)) p = subprocess.Popen(param, stdout=subprocess.PIPE) result = p.communicate() # Exists result if len(result[0]): lines = str(result[0]).split("\n") for line in lines: line = line.strip() if line == '': continue if rule.regex.strip() == '': # Find file_path = line.strip().replace(self.directory, '') log.debug('File: {0}'.format(file_path)) vul = CobraResults(self.task_id, rule.id, file_path, 0, '') db.session.add(vul) else: # Grep line_split = line.replace(self.directory, '').split(':', 1) file_path = line_split[0].strip() code_content = line_split[1].split(':', 1)[1].strip() line_number = line_split[1].split(':', 1)[0].strip() if file_path in white_list or ".min.js" in file_path: log.info("In white list or min.js") else: # Annotation # # // /* * match_result = re.match("(#)?(//)?(\*)?(/\*)?", code_content) if match_result.group(0) is not None and match_result.group(0) is not "": log.info("In Annotation") else: log.info('In Insert') exist_result = CobraResults.query.filter_by(task_id=self.task_id, rule_id=rule.id, file=file_path, line=line_number).first() if exist_result is not None: log.warning("Exists Result") else: log.debug('File: {0}:{1} {2}'.format(file_path, line_number, code_content)) vul = CobraResults(self.task_id, rule.id, file_path, line_number, code_content) db.session.add(vul) log.info('Insert Results Success') db.session.commit() else: log.info('Not Found') except Exception as e: log.critical('Error calling grep: ' + str(e)) # Set End Time For Task t = CobraTaskInfo.query.filter_by(id=self.task_id).first() t.status = 2 t.file_count = files['file_nums'] t.time_end = int(time.time()) t.time_consume = t.time_end - t.time_start t.updated_at = time.strftime('%Y-%m-%d %X', time.localtime()) try: db.session.add(t) db.session.commit() except Exception as e: log.critical("Set start time failed:" + e.message) log.info("Scan Done")
def hook(): log.warning("reloading code") server_reloader.main(run_server, before_reload=hook)
def uncompact_addr(c_addr): if c_addr[-2:] == '\0\0': log.warning('c_addr: %r > port is ZERO' % c_addr) raise AddrError return (bin_to_ip(c_addr[:-2]), bin_to_int(c_addr[-2:]))