def get_feeditems(url, min_timestamp=None): entries = FeedItemModel.query(FeedItemModel.feed_link == url) entries.order(FeedItemModel.published) # filter on timestamps if min_timestamp is not None and parse_timestamp(min_timestamp) is not None: entries = entries.filter(FeedItemModel.timestamp > parse_timestamp(min_timestamp)) # Get individual items items = [] for item in entries: print("Iterating an item:", item.title) items.append(feeditem_from_model(item)) return items
def get_feeditems(url, min_timestamp=None): entries = FeedItemModel.query(FeedItemModel.feed_link == url) entries.order(FeedItemModel.published) # filter on timestamps if (min_timestamp is not None and parse_timestamp(min_timestamp) is not None): entries = entries.filter(FeedItemModel.timestamp >\ parse_timestamp(min_timestamp)) # Get individual items items = [] for item in entries: print("Iterating an item:", item.title) items.append(feeditem_from_model(item)) return items
def update_db(self, _id, json_body): for note in json_body.get('notes', []): note.setdefault('author', self.uid) if 'timestamp' in note: note['timestamp'] = util.parse_timestamp(note['timestamp']) else: note['timestamp'] = datetime.datetime.utcnow() self.dbc.update({'_id': _id}, {'$set': util.mongo_dict(json_body)})
def is_modified(vegobjekt): # import pdb; pdb.set_trace() log = logging.getLogger("geofencebroker") next_date = parse_timestamp(vegobjekt["metadata"]["sist_modifisert"]) table_vegobjekter = db.get_table("vegobjekter") geofence = table_vegobjekter.find_one(id=vegobjekt.get("id")) if not geofence: log.warn("vegobjekt not found in database") return False prev_date = parse_timestamp(geofence["sist_modifisert"]) if next_date > prev_date: # 'vegobjekt' has been modified return True elif next_date < prev_date: log.warn("next_date < prev_data: (%s < %s)" % (next_date, prev_date)) log.warn("Most likely a bug!!") return False
def post(self, sid): """Create a new Acquisition.""" json_body = self._post() _id = bson.ObjectId(sid) session = self.app.db.sessions.find_one({'_id': _id}, ['permissions', 'public']) if not session: self.abort(404, 'no such session') if not self.superuser_request and util.user_perm(session['permissions'], self.uid).get('access') != 'admin': self.abort(400, 'must be session admin to create acquisition') json_body['session'] = _id json_body['permissions'] = session['permissions'] json_body['public'] = session.get('public', False) json_body['files'] = [] if 'timestamp' in json_body: json_body['timestamp'] = util.parse_timestamp(json_body['timestamp']) return {'_id': str(self.dbc.insert(json_body))}
def post(self, pid): """Create a new Session.""" json_body = self._post() _id = bson.ObjectId(pid) project = self.app.db.projects.find_one({'_id': _id}, ['group', 'permissions', 'public']) if not project: self.abort(404, 'no such project') if not self.superuser_request and util.user_perm(project['permissions'], self.uid).get('access') != 'admin': self.abort(400, 'must be project admin to create session') json_body['project'] = _id json_body['group'] = project['group'] json_body['permissions'] = project['permissions'] json_body['public'] = project.get('public', False) json_body['files'] = [] if 'timestamp' in json_body: json_body['timestamp'] = util.parse_timestamp(json_body['timestamp']) return {'_id': str(self.dbc.insert(json_body))}
def post(self, sid): """Create a new Acquisition.""" json_body = self._post() _id = bson.ObjectId(sid) session = self.app.db.sessions.find_one({'_id': _id}, ['permissions', 'public']) if not session: self.abort(404, 'no such session') if not self.superuser_request and util.user_perm( session['permissions'], self.uid).get('access') != 'admin': self.abort(400, 'must be session admin to create acquisition') json_body['session'] = _id json_body['permissions'] = session['permissions'] json_body['public'] = session.get('public', False) json_body['files'] = [] if 'timestamp' in json_body: json_body['timestamp'] = util.parse_timestamp( json_body['timestamp']) return {'_id': str(self.dbc.insert(json_body))}
def parse_js(js): thumb_url = self.last_logo_url url = js['url'] for _dict in js['image']: if _dict['size'] == 'extralarge': thumb_url = _dict['#text'] # Replace 'png' with 'jpg' in the URL. # This is a hack to allow Discord embeds to work. thumb_url = thumb_url[:-3] + 'jpg' data = {} data['account_created'] = js['registered']['unixtime'] data['account_created'] = util.parse_timestamp( data['account_created']) data['scrobbles'] = js['playcount'] data['country'] = js['country'] if data['country'] == '': del data['country'] data['age'] = js['age'] if data['age'] == '0': del data['age'] return thumb_url, url, data
def post(self, pid): """Create a new Session.""" json_body = self._post() _id = bson.ObjectId(pid) project = self.app.db.projects.find_one( {'_id': _id}, ['group', 'permissions', 'public']) if not project: self.abort(404, 'no such project') if not self.superuser_request and util.user_perm( project['permissions'], self.uid).get('access') != 'admin': self.abort(400, 'must be project admin to create session') json_body['project'] = _id json_body['group'] = project['group'] json_body['permissions'] = project['permissions'] json_body['public'] = project.get('public', False) json_body['files'] = [] if 'timestamp' in json_body: json_body['timestamp'] = util.parse_timestamp( json_body['timestamp']) return {'_id': str(self.dbc.insert(json_body))}
def demo(): logger.info(color("bold_yellow") + "尝试启动更新器,等待其执行完毕。若版本有更新,则会干掉这个进程并下载更新文件,之后重新启动进程...(请稍作等待)") dlc_path = os.path.realpath("auto_updater.py") p = subprocess.Popen( [ dlc_path, "--pid", str(os.getpid()), "--version", str(now_version), "--cwd", os.getcwd(), "--exe_name", os.path.realpath("DNF蚊子腿小助手.exe"), ], cwd="utils", shell=True, creationflags=subprocess.CREATE_NEW_PROCESS_GROUP | subprocess.DETACHED_PROCESS, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) p.wait() if p.returncode != 0: last_modify_time = parse_timestamp(os.stat(dlc_path).st_mtime) logger.error(f"DLC出错了,错误码为{p.returncode},DLC最后一次修改时间为{last_modify_time}") uploader = Uploader() netdisk_latest_dlc_info = uploader.find_latest_dlc_version() latest_version_time = parse_time(netdisk_latest_dlc_info.time) if latest_version_time > last_modify_time: logger.info( f"网盘中最新版本dlc上传于{latest_version_time}左右,在当前版本最后修改时间{last_modify_time}之后,有可能已经修复dlc的该问题,将尝试更新dlc为最新版本" ) uploader.download_file(netdisk_latest_dlc_info, "utils") else: logger.warning(f"网盘中最新版本dlc上传于{latest_version_time}左右,在当前版本最后修改时间{last_modify_time}之前,请耐心等待修复该问题的新版本发布~")
def main(): dirs = os.listdir(os.path.join(BASE_DIR, '_api')) if len(dirs) == 0: print("Springnote backup not found.") return elif len(dirs) == 1: subdomain = dirs[0] else: subdomain = None while subdomain not in dirs: subdomain = input("Choose ID ({}) : ".format(', '.join(dirs))) makedirs(os.path.join(DOKUWIKI_ROOT, subdomain, 'data', 'attic'), exist_ok=True) makedirs(os.path.join(DOKUWIKI_ROOT, subdomain, 'data', 'meta'), exist_ok=True) makedirs(os.path.join(DOKUWIKI_ROOT, subdomain, 'data', 'pages'), exist_ok=True) pages = _load(subdomain, 'pages') tree = E.pages() node_by_id = {} dangling_nodes = {} # pages = {_['page']['identifier']: _['page'] for _ in pages} # No need yet print('0 / {} pages'.format(len(pages))) for i, entry in enumerate(pages): id_ = entry['page']['identifier'] page = _load(subdomain, 'pages/{}'.format(id_)) # tree if id_ not in node_by_id: node = E.page(id=str(id_)) node_by_id[id_] = node elif id_ in dangling_nodes: node = node_by_id[id_] del dangling_nodes[id_] else: raise ValueError('Multiple pages with a same ID: {}'.format(id_)) node.set('title', entry['page']['title']) parent_id = entry['page'].get('relation_is_part_of', None) if not parent_id: # the root tree.append(node) elif parent_id in node_by_id: node_by_id[parent_id].append(node) else: assert parent_id not in dangling_nodes parent = E.page(node, id=str(parent_id)) node_by_id[parent_id] = dangling_nodes[parent_id] = parent # content xhtml = b'''<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> <html version="-//W3C//DTD XHTML 1.1//EN" xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.w3.org/1999/xhtml http://www.w3.org/MarkUp/SCHEMA/xhtml11.xsd" > <head> <title>{title}</title> </head> <body> {body} </body> </html> '''.format( title=page['page']['title'].encode('utf8'), body=page['page']['source'].encode('utf8')) # TODO: save private pages in a different path _save(subdomain, 'pages/{}.xhtml'.format(id_), xhtml) # revisions revisions_data = _load(subdomain, 'pages/{}/revisions'.format(id_)) if revisions_data: revisions = sorted(revisions_data, key=lambda _: _['revision']['date_created']) data = [] for k, revision in enumerate(revisions): description = revision['revision']['description'] revision = _load(subdomain, 'pages/{}/revisions/{}'.format( id_, revision['revision']['identifier'])) timestamp = util.parse_timestamp( revision['revision']['date_created']) data.append('\t'.join([ str(int(timestamp)), '127.0.0.1', 'E' if k else 'C', # for 'edit' and 'create' respectively str(id_), revision['revision']['creator'] or '', description])) source = revision['revision']['source'] or '' path = os.path.join(DOKUWIKI_ROOT, subdomain, 'data', 'attic', '{}.{}.xhtml.gz'.format(id_, timestamp)) with gzip.open(path, 'wb+') as f: f.write(source.encode('utf8')) _save(subdomain, 'meta/{}.changes'.format(id_), '\n'.join(data)) # TODO: attachments print(b'{} / {} pages'.format(i + 1, len(pages))) print('Saving tree structures...') lost_and_found = E.page(id='lost_and_found', title='Lost and Found') if dangling_nodes: for id_, node in dangling_nodes.items(): lost_and_found.append(node) tree.append(lost_and_found) # NOTE: The tree is sorted by title by default; maybe we need to # double-check it for node in list(tree.getiterator()): if node.tag != 'page': continue path = [] parent = node.getparent() while parent.tag == 'page': path.insert(0, parent.get('id')) parent = parent.getparent() node.set('path', ':'.join(path)) new_tree = E.pages() for node in list(tree.getiterator()): if node.tag != 'page': continue new_tree.append(node) _save(subdomain, 'meta/_tree.xml', lxml.etree.tostring(new_tree, pretty_print=True, encoding='utf8')) # tree with public pages only for node in list(new_tree.getiterator()): if node.tag in ['pages']: continue id_ = node.get('id') if id_ in ['lost_and_found']: continue collaboration = _load(subdomain, 'pages/{}/collaboration'.format(int(id_))) # The page is public when any of the following is true: # - collaboration.json is an empty list # - There exists {"access_rights": "reader", # "rights_holder": "everybody", ...} is_public = not collaboration or any( _['collaboration']['access_rights'] == 'reader' and _['collaboration']['rights_holder'] == 'everybody' for _ in collaboration) if not is_public: for child in node: node.remove(child) lost_and_found.append(child) node.getparent().remove(node) # TODO: Sort by title? _save(subdomain, 'pages/_tree.xml', lxml.etree.tostring(new_tree, pretty_print=True, encoding='utf8'))
def do_classify(incoming_data, x_request_id): '''wrapper of classify ''' result = {'code': 1, 'message': ''} logger.debug('<%s>, [do_classify] enter, incoming_data=%s' % (x_request_id, incoming_data)) tag = incoming_data['tag'] seq = incoming_data['seq'] pois = incoming_data['pois'] algo_type = incoming_data.get('algo_type', 'gmm') logger.info('<%s>, [classify gmm] params: tag=%s, seq=%s, algo_type=%s' %(x_request_id, tag, seq, algo_type)) # parse seq seq = [util.parse_timestamp(e) for e in seq] # classify models = [] for poi in pois: try: model = dao.get_model_by_tag_lable(algo_type, tag, poi) except IndexError: logger.error('tag=%s, label=%s can not find model' % (tag, poi)) result['message'] = '<%s>, [do_classify] Can not find model whose tag=%s, label=%s ' % (x_request_id ,tag, poi) raise ValueError(result['message']) models.append(model) _models = [] labels = [] for model in models: labels.append(model.get('eventLabel')) _model = { 'nMix': model.get('nMix'), 'covarianceType': model.get('covarianceType'), 'nIter': model.get('nIter'), 'count': model.get('count'), 'params': model.get('params'), } _models.append(_model) my_predictor = Predictor(_models) logger.debug('-------\n\n_models:\n %s\n' % (_models)) score_results = [] seq_scores = my_predictor.scores(seq) logger.debug('seq_scores: %s' % (seq_scores)) for scores in seq_scores: score_result = {} for index, score in enumerate(scores): if labels[index] in score_result: score_result[labels[index]] += score else: score_result[labels[index]] = score score_results.append(score_result) logger.debug('!!!!!! score_result:%s' %(score_result)) # store seq in db for index, timestamp in enumerate(seq): event_label = max(score_results[index].iterkeys(), key=lambda key: score_results[index][key]) # max prob key is labelt dao.save_train_data(timestamp, event_label) logger.info('<%s> [classify] store timestamp=%s, label=%s to db success' % (x_request_id, timestamp, event_label)) logger.info('<%s> [classify gmm] success' % (x_request_id)) logger.debug('<%s> [classify gmm] result: %s' % (x_request_id, score_results)) result['code'] = 0 result['message'] = 'success' result['result'] = score_results return result
def test_parse_timestamp(): assert parse_timestamp(1628222400.0) == now_for_test
def download_file(self, fileinfo: FileInFolder, download_dir: str, overwrite=True, show_log=True, download_only_if_server_version_is_newer=True) -> str: """ 下载最新版本压缩包到指定目录,并返回最终压缩包的完整路径 """ make_sure_dir_exists(download_dir) download_dir = os.path.realpath(download_dir) target_path = StrWrapper(os.path.join(download_dir, fileinfo.name)) if download_only_if_server_version_is_newer and os.path.isfile( target_path.value): # 仅在服务器版本比本地已有文件要新的时候才重新下载 # 由于蓝奏云时间显示不精确,将其往前一分钟,避免同一文件下次检查时其蓝奏云时间显示为xx分钟前,解析后会有最多一分钟内的误差,而导致不必要的重新下载 # 比如本次是x分y秒检查并更新,下次检查时是x+6分y+10秒,此时解析蓝奏云时间得到上传时间为x分y+10秒,就会产生额外的不必要下载 server_version_upload_time = parse_time( fileinfo.time) - timedelta(minutes=1) local_version_last_modify_time = parse_timestamp( os.stat(target_path.value).st_mtime) get_log_func( logger.info, show_log )(f"{fileinfo.name} 本地修改时间为:{local_version_last_modify_time} 网盘版本上传时间为:{server_version_upload_time}" ) if server_version_upload_time <= local_version_last_modify_time: # 暂无最新版本,无需重试 get_log_func( logger.info, show_log )(color("bold_cyan") + f"当前设置了对比修改时间参数,网盘中最新版本 {fileinfo.name} 上传于{server_version_upload_time}左右,在当前版本{local_version_last_modify_time}之前,无需重新下载" ) return target_path.value def after_downloaded(file_name): """下载完成后的回调函数""" target_path.value = file_name get_log_func(logger.info, show_log)(f"最终下载文件路径为 {file_name}") get_log_func(logger.info, show_log)(f"即将开始下载 {target_path.value}") callback = None if show_log: callback = self.show_progress retCode = self.down_file_by_url(fileinfo.url, "", download_dir, callback=callback, downloaded_handler=after_downloaded, overwrite=overwrite) if retCode != LanZouCloud.SUCCESS: get_log_func(logger.error, show_log)(f"下载失败,retCode={retCode}") if retCode == LanZouCloud.NETWORK_ERROR: get_log_func( logger.warning, show_log )(color("bold_yellow") + ("蓝奏云api返回网络错误,这很可能是由于dns的问题导致的\n" "分别尝试在浏览器中访问下列两个网页,是否一个打的开一个打不开?\n" "https://fzls.lanzoux.com/s/djc-helper\n" "https://fzls.lanzous.com/s/djc-helper\n" "\n" "如果是这样,请按照下面这个链接,修改本机的dns,使用阿里、腾讯、百度、谷歌dns中的任意一个应该都可以解决。\n" "https://www.ypojie.com/9830.html\n" "\n" "如果两个都打不开,大概率是蓝奏云挂了-。-可选择忽略后面的弹框,继续运行旧版本,或者手动去QQ群或github下载最新版本" )) raise Exception("下载失败") return target_path.value
def download_file_in_folder( self, folder: Folder, name: str, download_dir: str, overwrite=True, show_log=True, try_compressed_version_first=False, cache_max_seconds=600, download_only_if_server_version_is_newer=True) -> str: """ 下载网盘指定文件夹的指定文件到本地指定目录,并返回最终本地文件的完整路径 """ def _download(fname: str) -> str: return with_cache( cache_name_download, os.path.join(folder.name, fname), cache_max_seconds=cache_max_seconds, cache_miss_func=lambda: self. download_file(self.find_file(folder, fname), download_dir, overwrite=overwrite, show_log=show_log, download_only_if_server_version_is_newer= download_only_if_server_version_is_newer), cache_validate_func=lambda target_path: os.path.isfile( target_path), ) if try_compressed_version_first: # 先尝试获取压缩版本 compressed_filename = self.get_compressed_version_filename(name) try: get_log_func(logger.info, show_log)(color("bold_green") + f"尝试优先下载压缩版本 {compressed_filename}") # 记录下载前的最近修改时间 before_download_last_modify_time = None old_compressed_filepath = os.path.join(download_dir, compressed_filename) if os.path.isfile(old_compressed_filepath): before_download_last_modify_time = parse_timestamp( os.stat(old_compressed_filepath).st_mtime) # 下载压缩版本 compressed_filepath = _download(compressed_filename) # 记录下载完成后的最近修改时间 after_download_last_modify_time = parse_timestamp( os.stat(compressed_filepath).st_mtime) # 解压缩 dirname = os.path.dirname(compressed_filepath) target_path = os.path.join(dirname, name) need_decompress = True if before_download_last_modify_time is not None and before_download_last_modify_time == after_download_last_modify_time and os.path.exists( target_path): # 如果前后修改时间没有变动,说明没有实际发生下载,比如网盘版本与当前本地版本一致,如果此时目标文件已经解压过,将不再尝试解压 need_decompress = False if need_decompress: decompress_file_with_lzma(compressed_filepath, target_path) else: get_log_func(logger.info, show_log)( f"{compressed_filepath}未发生改变,且目标文件已存在,无需尝试解压缩") # 返回解压缩的文件路径 return target_path except Exception as e: get_log_func(logger.error, show_log)( f"下载压缩版本 {compressed_filename} 失败,将尝试普通版本~", exc_info=e) # 下载普通版本 return _download(name)
def main(): if not os.path.exists(os.path.join(BASE_DIR, '_api')): print("Springnote backup not found.") return dirs = os.listdir(os.path.join(BASE_DIR, '_api')) if len(dirs) == 0: print("Springnote backup not found.") return elif len(dirs) == 1: subdomain = dirs[0] else: subdomain = None while subdomain not in dirs: subdomain = input("Choose ID ({}) : ".format( ', '.join(sorted(dirs)))) print("Initializing...") util.makedirs(SAVE_ROOT, exist_ok=True) pages = util.load_resource(subdomain, 'pages') print("Converting XHTML to MediaWiki...") output_paths = [ os.path.join(SAVE_ROOT, '{}.xml'.format(subdomain)), os.path.join(SAVE_ROOT, '{}.private.xml'.format(subdomain))] output_fp = [open(_, 'w+') for _ in output_paths] for fp in output_fp: fp.write('<mediawiki>\n') id2title = {str(_['page']['identifier']): _['page']['title'] for _ in pages} n = 0 titles = set() for entry in pages: id_ = entry['page']['identifier'] fp = output_fp[0] if util.is_public(subdomain, id_) else output_fp[1] page = util.load_resource(subdomain, 'pages/{}'.format(id_)) title = page['page']['title'] new_title = title for k, v in MEDIAWIKI_NG.items(): new_title = new_title.replace(k, v) j = 1 new_title_ = new_title while new_title_ in titles: j += 1 new_title_ = new_title + ' ({})'.format(j) new_title = new_title_ titles.add(new_title) revisions_data = util.load_resource(subdomain, 'pages/{}/revisions'.format(id_)) if revisions_data: revisions = sorted(revisions_data, key=lambda _: _['revision']['date_created']) else: revisions = [] fp.write('<page>\n') fp.write(u'<title>{}</title>\n'.format( escape(new_title)).encode('utf-8')) wiki_text = '' for revision in revisions: timestamp = util.parse_timestamp( revision['revision']['date_created']) timestamp = datetime.datetime.utcfromtimestamp(timestamp)\ .strftime('%Y-%m-%dT%H:%M:%SZ') revision_data = util.load_resource(subdomain, 'pages/{}/revisions/{}'.format( id_, revision['revision']['identifier'])) source = revision_data['revision']['source'] wiki_text = (sabal2mediawiki(source, id2title) if source else wiki_text) # previous revision fp.write('<revision>\n') fp.write('<timestamp>{}</timestamp>\n'.format(timestamp)) if revision['revision'].get('creator'): fp.write('<contributor><username>{}' '</username></contributor>\n'.format( escape(revision['revision']['creator']))) if revision['revision'].get('description'): fp.write(u'<comment>{}</comment>\n'\ .format(escape(revision['revision']['description']))\ .encode('utf-8')) fp.write(u'<text>{}</text>\n'\ .format(escape(wiki_text)).encode('utf-8')) fp.write('</revision>\n') fp.write('</page>\n') n += 1 print(b"{} / {} pages - {}".format( n, len(pages), title.encode('utf8'))) for fp in output_fp: fp.write('</mediawiki>') fp.close() print("Converted XML at:") for _ in output_paths: print(_)