def getText(nodelist): rc = "" for node in nodelist: try: rc += node.data.encode("utf8") except Exception, msg: getLogger().error(msg)
def handleStringfilter(org_string, filter): ret_text = org_string try: if filter.startswith("****"): sep = filter[4:].strip() if org_string.count(sep) > 0: ret_text = org_string[:org_string.rfind(sep)] elif filter.endswith("****"): sep = filter[:-4].strip() if org_string.count(sep) > 0: ret_text = org_string[org_string.find(sep)+len(sep):].strip() elif filter.count("****") > 0: pieces = filter.split("****") s_cur = org_string.find(pieces[0].strip()) e_cur = org_string.rfind(pieces[1].strip()) if s_cur >=0 and e_cur > s_cur: ret_text = org_string[s_cur+len(pieces[0].strip()):e_cur] else: del_pieces = filter.split(";") for piece in del_pieces: org_string = org_string.replace(piece, "") ret_text = org_string except Exception, msg: getLogger().error(msg)
def getRssInfo(rss): downLoader = Downloader() try: (t_url, header, html) = downLoader.open(rss) print "download ", rss except Exception, msg: getLogger().error("feed download error : %s %s", msg, rss) return None
def __init__(self): self.commands = CommandManager() self.event_manager = EventManager() self.logger = getLogger("Manager") self.plugman = PluginManager(self) self.yapsy_logger = getLogger("yapsy") self.metrics = None
def parse(self, contents, temp): # resultReturn result_dict = dict() for field in ["title", "link", "image", "generator", "language", "description", "writer"]: result_dict[field] = "" try: self.dom = xml.dom.minidom.parseString(contents) self.title = getText(self.dom.getElementsByTagName("title")[0].childNodes) result_dict["title"] = self.title if len(self.dom.getElementsByTagName("link")) > 0: result_dict["link"] = getText(self.dom.getElementsByTagName("link")[0].childNodes).strip() if len(self.dom.getElementsByTagName("image")) > 0: result_dict["image"] = getText( self.dom.getElementsByTagName("image")[0].getElementsByTagName("url")[0].childNodes ) if len(self.dom.getElementsByTagName("generator")) > 0: result_dict["generator"] = getText(self.dom.getElementsByTagName("generator")[0].childNodes) if result_dict["generator"].find("wordpress") >= 0: return self.parseWordPress(contents) if result_dict["generator"].lower().find("blogger") >= 0: return self.parseBlogspot(contents) if len(self.dom.getElementsByTagName("language")) > 0: result_dict["language"] = getText(self.dom.getElementsByTagName("language")[0].childNodes) if len(self.dom.getElementsByTagName("description")) > 0: result_dict["description"] = getText(self.dom.getElementsByTagName("description")[0].childNodes) try: if len(self.dom.getElementsByTagName("managingEditor")) > 0: result_dict["writer"] = getText(self.dom.getElementsByTagName("managingEditor")[0].childNodes) elif len(self.dom.getElementsByTagName("webMaster")) > 0: result_dict["writer"] = getText(self.dom.getElementsByTagName("webMaster")[0].childNodes) else: tt_list = self.dom.getElementsByTagName("author") try: if len(tt_list) > 0: writer = getText(tt_list[0].getElementsByTagName("name")[0].childNodes) if writer != "": result_dict["writer"] = writer tt_node = self.dom.getElementsByTagName("author")[0].getElementsByTagName("gd:image")[0] image = tt_node.attributes["src"].value.encode("utf8") if image != "": result_dict["image"] = image except Exception, msg: pass except Exception, msg: getLogger().error(msg) except Exception, msg: getLogger().error(msg)
def getTistoryId(url): downLoader = Downloader() attr_dict = dict() attr_dict["tid"] = "livere_blogurl = '****.tistory.com';" attr_dict["tid2"] = """__addParam("author","****");""" try: (t_url, header, html) = downLoader.open(url) print "download", url except Exception, msg: getLogger().error("feed download error : %s %s", msg, rss) return None
def getDBConnection(host, usr, pwd, db, cursor_type="normal"): db_connect = None db_cursor = None try: if cursor_type == "dict": db_connect = MySQLdb.connect(host, usr, pwd, db, cursorclass=MySQLdb.cursors.DictCursor) else: db_connect = MySQLdb.connect(host, usr, pwd, db) db_connect.set_character_set('utf8') db_cursor = db_connect.cursor() except Exception, msg: getLogger().error("getDBCursor() Failed : %s"%msg)
def getDBConnection(host, usr, pwd, db, cursor_type="normal"): db_connect = None db_cursor = None try: if cursor_type == "dict": db_connect = MySQLdb.connect( host, usr, pwd, db, cursorclass=MySQLdb.cursors.DictCursor) else: db_connect = MySQLdb.connect(host, usr, pwd, db) db_connect.set_character_set('utf8') db_cursor = db_connect.cursor() except Exception, msg: getLogger().error("getDBCursor() Failed : %s" % msg)
def makeOutputDict(document_data): try: if document_data.parsing_result: result_dict = document_data.parsing_result else: result_dict = dict() result_dict["mode"] = document_data.mode result_dict["type"] = document_data.type result_dict["guid"] = document_data.guid result_dict["crawlTime"] = document_data.crawl_time if "body" in result_dict: result_dict["body"] = result_dict["body"].replace("]", "]") if "bodyHtml" in result_dict: result_dict["bodyHtml"] = result_dict["bodyHtml"].replace("]", "]") if "title" in result_dict: result_dict["title"] = " ".join(result_dict["title"].replace("]", "]").split()) result_dict["webLink"] = document_data.down_url result_dict["mobileLink"] = document_data.mobile_url if document_data.type == "NEWS": result_dict["channelName"] = document_data.domain_data.name result_dict["channelIdentifier"] = document_data.domain_data.url result_dict["sourceType"] = 4 try: image_data = document_data.image_data if "78x78" in image_data: result_dict["imageThumbnail78x78"] = image_data["78x78"] if "126x126" in image_data: result_dict["imageThumbnail126x126"] = image_data["126x126"] if "signature" in image_data: result_dict["imageThumbnailSignature"] = image_data["signature"] except Exception, msg: getLogger().error(msg) elif document_data.type == "BBS": for int_field in ["readCount", "replyCount", "recommendCount", "videoCount", "imageCount"]: if int_field not in result_dict: result_dict[int_field] = 0 result_dict["siteName"] = document_data.domain_data.name result_dict["siteIdentifier"] = document_data.domain_data.url outLinks = list() for link in result_dict["bodyLinks"]: l_data = result_dict["bodyLinks"][link] outLinks.append("%s\t%s" % (link, l_data.text)) result_dict["outLinks"] = "\n".join(outLinks)
def sendData(self, data, sc="NEWS"): try: if sc != "NEWS": return "NO SERVICE" res = self.producer.send_messages(sc, data) if str(res).find("error=0") >= 0: return "OK" else: getLogger().error(str(res)) return "ERROR" except Exception, msg: getLogger().error(msg) return "ERROR"
def __init__(self, _outputDirPath=os.getcwd(), _documentCountLimit=1000): self.fieldListDic = dict() self.outputDirPath = _outputDirPath self.documentCountLimit = _documentCountLimit self.logger = getLogger() self.setupOutputDir() self.initSCFieldListDic()
def __init__(self): self.log = getLogger("Updates") self.current = current self.current_v = StrictVersion(current) self.load_release() self.do_warnings()
def _getRules(self, url, verbose=False): """ Returns the RobotTextRules object for url(site-level or dir-level) First: use internal cache Second: use memcache Third: download robots.txt and parsing """ logger = log.getLogger() # 1. use stored robots dictionary cache robots_site_path = urlparse.urljoin(url, "/robots.txt") # Then the site-level if robots_site_path in self.robots: if verbose: logger.info("robotstxt in local memory: %s", robots_site_path) return self.robots[robots_site_path] # 2. use memcache rules = None try: # 3. download robots text rules = self._parsingRobotsFile(robots_site_path) # First try site-level if verbose: logger.info("robotstxt downloaded: %s: %s", rules.return_code, robots_site_path) self.robots[robots_site_path] = rules except: pass return rules
def isOldImage(chk_api, hash_key): try: cmd = "curl %s --connect-timeout 5 --max-time 10 "%chk_api exist = True fd = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) for line in fd.stdout.readlines(): results = line.strip() if results.find("404") >= 0: exist = False break if results.find(hash_key.upper()) >= 0: getLogger().info("same hash") return True except Exception, msg: getLogger().error(msg)
def parse(self, header, html, url, parser_id=None): if self.prm == None: self.setRules() ret_dict = self.parser.plugParser(header, html, url) result_dict = dict() if parser_id != None and parser_id in self.prm.id_dict: try: host_rule = self.prm.id_dict[parser_id] result_dict = self.getDataByRule(host_rule, ret_dict, url) result_dict["parser_id"] = parser_id return result_dict except Exception, msg: getLogger().error(msg)
def uploadImage(upload_url, file): retry = 0 results = "" m_t = time.time() while retry < 3: try: cmd = "curl --upload-file %s %s --connect-timeout 5 --max-time 10 --header 'Expect:' "%(file, upload_url+"/fileext/jpg") fd = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) for line in fd.stdout.readlines(): results = line.strip() if results.startswith("OK"): e_t = time.time() getLogger().info("%s upload time : %s", upload_url, e_t - m_t) return "OK" except Exception, msg: getLogger().error(msg) retry += 1
def downloadImage(img_url): rq = mechanize.Request(img_url) try: rs = mechanize.urlopen(rq) http_content = rs.read() header = rs.info() return http_content except Exception, msg: try: getLogger().error("%s %s", img_url, msg) time.sleep(1) rs = mechanize.urlopen(rq) http_content = rs.read() return http_content except Exception, msg: getLogger().error("%s %s", img_url, msg) return str(msg)
def __init__(self): self.logger = getLogger("Help") self.add_topic( AliasListTopic("aliases", HelpTopicType.GenericTopic) ) self.add_topic( ComandListTopic("commands", HelpTopicType.GenericTopic) )
def __init__(self, filename): self.callbacks = [] self.logger = getLogger("YamlConfig") # Some sanitizing here to make sure people can't escape the config dirs filename = filename.strip("..") self.filename = filename self.exists = self.reload(False)
def __init__(self, factory_manager=None, path="./plugins", module="plugins"): if factory_manager is None: raise ValueError("Factory manager cannot be None!") self.log = getLogger("Plugins") self.factory_manager = factory_manager self.module = module self.path = path
def makeUrlFromPattern(patterns, ret_key_dict): try: for ret_url in patterns: anypath_str = "" for kk in ret_key_dict: if (kk in INT_KEY or kk.startswith("INT_")) and not isInt(ret_key_dict[kk]) : break if kk == "ANYPATH" and ret_key_dict[kk].strip() == "": ret_url = ret_url.replace("/(ANYPATH)/", "/") else: ret_url = ret_url.replace("("+kk+")", ret_key_dict[kk]) if kk == "ANYPATH": anypath_str = "/"+ret_key_dict[kk] if ret_url.count("(") == 0 and ret_url.count(")") == 0: ret_url = quote(ret_url.strip(), safe=RESERVED) ret_url = ret_url.replace("://m.www.","://m.").replace(anypath_str, "") return ret_url except Exception, msg: getLogger().error(msg)
def writeEachDocumentData(self, f, dataDic, type): try: if type == "test": f.write("%s\t%s\t%s\n" % (dataDic["guid"], dataDic["title"], dataDic["bodyHtml"])) else: keyList = None if "type" in dataDic: scType = dataDic["type"] if scType in self.fieldListDic: keyList = self.fieldListDic[scType] if keyList: # 필드명 지정 for field in keyList: if field in dataDic and dataDic[field] and dataDic[field] != "": f.write(" <%s><![CDATA[%s]]></%s>\n" % (field, dataDic[field], field)) else: # 필드명 미지정(모두출력) for key, val in dataDic.items(): f.write(" <%s><![CDATA[%s]]></%s>\n" % (key, val, key)) except Exception, msg: getLogger().error("%s %s FILE WRITE ERROR", msg, dataDic)
def __init__(self, name, factory, config): NoChannelsProtocol.__init__(self, name, factory, config) self.log = getLogger(self.name) self.event_manager = EventManager() self.command_manager = CommandManager() reactor.connectTCP( self.config["connection"]["host"], self.config["connection"]["port"], self.factory, 120 )
def __init__(self, init_dic): self.info_dic = dict() self.logger = getLogger() if self.isValidInfo(init_dic): self.info_dic = init_dic else: self.logger.error("Invalid init information") exit(1) self.url_parser = URLParser() # 가장 기본형태의 URL 파서 self.pattern_dic = {"normal":dict(), "host_key":dict()} # 모든 패턴정보를 담을 Dictionary # dic["normal" | "hostkey"][domain] = [(priority, URLData()), ...] self.build_pt_dic = dict() # 추출해낸 예약어로 다른 원하는 여러가지의 URL을 만들어낼 수 있다.
def __init__(self, protocol_name, config, manager): self.logger = getLogger("F: %s" % protocol_name) self.config = config self.manager = manager self.name = protocol_name self.ptype = config["main"]["protocol-type"] self.protocol_class = None self.protocol = None manager_config = manager.main_config reconnections = manager_config["reconnections"] self.r_delay = int(reconnections["delay"]) self.r_attempts = int(reconnections["attempts"]) self.r_on_drop = reconnections["on-drop"] self.r_on_failure = reconnections["on-failure"] self.r_reset = reconnections["reset-on-success"]
def __init__(self, filename): self.callbacks = [] self.logger = getLogger("Data") filename = filename.strip("..") folders = filename.split("/") folders.pop() folders = "/".join(folders) if not os.path.exists(folders): os.makedirs(folders) self.filename = filename self.reload(False)
def __init__(self, path, *args, **kwargs): self.callbacks = [] self.logger = getLogger("Redis") self.path = path self.url = kwargs.get("url", None) self.logger.trace("Path: %s" % path) self.logger.trace("Args: %s" % (args or "[]")) self.logger.trace("KWArgs: %s" % (kwargs or "{}")) self.args = args self.kwargs = kwargs self.reconnect()
def isDisallowSite(self, url, verbose=False): """ robots.txt가 아래 문구를 포함하면 True를 리턴. User-agent: * or zumbot Disallow: / """ logger = log.getLogger() self.delay = 3 robots_site_path = urlparse.urljoin(url, "/robots.txt") # Then the site-level # 3. download robots text self.blocked = False rules = self._parsingRobotsFile(robots_site_path) # First try site-level if self.blocked: return True, self.delay else: return False, self.delay
def __init__(self): if not sys.stdout.isatty() or "--no-console" in sys.argv: self.wrapped = False return self.logger = getLogger("Console") self.old_stdout = sys.stdout self.old_stderr = sys.stderr self.wrapper = Wrapper(self) self.wrapper_err = WrapperErr(self) self.reader = Reader(self) # We set it here sys.stdout = self.wrapper sys.stderr = self.wrapper_err
def __init__(self, name, factory, config): self.name = name self.factory = factory self.config = config self.received = "" self.log = getLogger(self.name) self.log.info("Setting up..") self.command_manager = CommandManager() self.event_manager = EventManager() self.username = config["identity"]["username"] self.password = config["identity"]["password"] self.networking = config["network"] self.tokens = config["identity"]["tokens"] self.control_chars = config["control_chars"] audio_conf = config.get("audio", {}) self.should_mute_self = audio_conf.get("should_mute_self", True) self.should_deafen_self = audio_conf.get("should_deafen_self", True) event = general_events.PreConnectEvent(self, config) self.event_manager.run_callback("PreConnect", event) context = self._get_client_context() if context is None: # Could not create a context (problem loading cert file) self.factory.manager.remove_protocol(self.name) return reactor.connectSSL( self.networking["address"], self.networking["port"], self.factory, context, 120 ) event = general_events.PostConnectEvent(self, config) self.event_manager.run_callback("PostConnect", event)
def __init__(self, factory, config): self.factory = factory self.config = config self.log = getLogger("TS3") self.log.info("Setting up..") self.server = config["server"] self.identity = config["identity"] self.user = self.identity["username"] self.passw = self.identity["password"] self.sid = self.server["sid"] reactor.connectTCP( self.server["address"], self.server["port"], self.factory, 120 )
def __init__(self, path, *args, **kwargs): self.callbacks = [] self.logger = getLogger("DBAPI") path = path.replace("//", "/") path = path.split("/", 1)[1] self.path = path self.logger.trace("Path: %s" % path) self.logger.trace("Args: %s" % (args or "[]")) self.logger.trace("KWArgs: %s" % (kwargs or "{}")) parsed_module = path.split(":", 1)[0] self.parsed_module = parsed_module self.args = args self.kwargs = kwargs self.logger.debug(_("Parsed module: %s") % parsed_module) self.reconnect()
def set_language(self, lang=None, mlang=None): if lang is None: lang = DEFAULT if mlang is None: mlang = DEFAULT self.get_known() if self.log and self.logger is None: from utils.log import getLogger self.logger = getLogger("Translations") if lang not in self.known: if self.logger is None: print "Unknown language '%s', defaulting to '%s'" \ % (lang, DEFAULT) else: self.logger.warn("Unknown language '%s', defaulting to '%s'" % (lang, DEFAULT)) lang = DEFAULT if mlang not in self.known: if self.logger is None: print "Unknown language '%s', defaulting to '%s'" \ % (mlang, DEFAULT) else: self.logger.warn("Unknown language '%s', defaulting to '%s'" % (mlang, DEFAULT)) mlang = DEFAULT self.language = lang self.m_language = mlang self.reload()
def __init__(self): self.logger = getLogger("Permissions") self.confdir = tmpdir + "/config/" self.datadir = tmpdir + "/data/" try: os.makedirs(self.confdir) os.makedirs(self.datadir) self.logger.debug("Config and data dirs created.") except Exception: pass yaml.dump({"editor_warning": False}, open(self.confdir + "settings.yml", "w")) self.storage = StorageManager(self.confdir, self.datadir) self.data = self.storage.get_file(self, "data", formats.YAML, "permissions.yml") self.handler = permissionsHandler(self, self.data) super(PluginObject, self).__init__()
import logging import os import pickle from pprint import pprint import numpy as np import pandas as pd import zerorpc from sklearn.linear_model import SGDClassifier from sqlalchemy import update from model.db import DB_ENGINE, rawcontents from utils.log import getLogger logger = getLogger('semiTrain') def fetchAllData(threshold): return pd.read_sql( 'SELECT rid, tag, assure FROM rawcontents WHERE LENGTH(content) > {}'. format(threshold), DB_ENGINE) def randomSelectData(data, count): inds = np.arange(len(data)) np.random.shuffle(inds) return data.iloc[inds[:count]].copy() def completeTrainData(current): stmt = 'SELECT rid, content, vector FROM rawcontents WHERE rid IN {}'
def __init__(self, data_dict): self.callbacks = [] self.logger = getLogger("Data") self.data = data_dict
def __init__(self): self.log = getLogger("GetchUnix") import tty import sys self.log.trace(_("Loaded: %s, %s") % (tty, sys))
from collections import namedtuple from sqlalchemy import MetaData, create_engine from sqlalchemy.schema import (Column, ForeignKey, ForeignKeyConstraint, Index, PrimaryKeyConstraint, Table, UniqueConstraint) from sqlalchemy.types import BLOB, Boolean, DateTime, Float, Integer, String from settings import DB_ENGINE_FILE, ECHO_DATABASE_INFO from utils.log import getLogger logger = getLogger('db') # models DB_ENGINE = create_engine('sqlite:///{}'.format(DB_ENGINE_FILE), echo=ECHO_DATABASE_INFO) metadata = MetaData() posts = Table( 'posts', metadata, Column('pid', Integer, autoincrement=True), Column('blockid', String, nullable=False, comment='板块id'), Column('postid', Integer, nullable=False, comment='帖子id'), Column('title', String, nullable=False, comment='帖子标题'), Column('pageurl', String, nullable=False, comment='帖子首页url'), Column('subType', String, comment='帖子子类型'), Column('activityuserid', Integer, nullable=False, comment='楼主id'), Column('clickcount', Integer, nullable=False, comment='点击数'), Column('replycount', Integer, nullable=False, comment='回复数'), Column('remarkcount', Integer, nullable=False, comment='楼主发言数'), Column('imgcount', Integer, nullable=False, comment='图片数'),
def __init__(self): self.token_regex = re.compile(r"\{[^}]*\}") self.parse_regex = re.compile(r"(?<!\\):") self.escape_regex = re.compile(r"\\:") self.logger = getLogger("Tokens")
from gensim.models.doc2vec import Doc2Vec, TaggedDocument from gensim.test.utils import common_texts from sqlalchemy import select from model.db import DB_ENGINE, posts, rawcontents from utils.log import getLogger logger = getLogger('doc2vec') # init with DB_ENGINE.connect() as conn: s = select([posts.c.title]) documents = [ TaggedDocument(doc, [pid]) for pid, doc in enumerate(conn.execute(s)) ] model = Doc2Vec(documents, vector_size=2048, window=5, min_count=1, workers=4) model.save('RuntimeTY/d2v_2048_5_1216') model = Doc2Vec.load('RuntimeTY/d2v_2048_5_1216') logger.critical('Loaded') with DB_ENGINE.connect() as conn: s = select([rawcontents]) buffer = [] for row in conn.execute(s): rid = row[rawcontents.c.rid]
def __init__(self): self.logger = getLogger("Help") self.add_topic(AliasListTopic("aliases", HelpTopicType.GenericTopic)) self.add_topic(ComandListTopic("commands", HelpTopicType.GenericTopic))
def __init__(self): self.log = getLogger("GetchWindows") import msvcrt self.log.trace(_("Loaded: %s") % msvcrt)
import logging import re from urllib import parse import demjson from model.db import Reply from utils.datetime import parseDatetimeString from utils.log import getLogger from utils.request import getSoup, with_max_retries logger = getLogger('parser', logging.INFO) class FetchPostFailed(BaseException): pass # app @with_max_retries(3, 10) def extractAll(blockid, postid): url = urlFactory(blockid, postid, 1) soup = getPage(url) if soup == None: return None bbsGlobal = extractBBSGlobal(soup) if bbsGlobal['isWenda'] or bbsGlobal['subType'] == '本版隐藏': logger.info('Got subtype with {} in {}'.format(bbsGlobal['subType'],
from utils.log import getLogger import pandas as pd from model.db import DB_ENGINE from sklearn.linear_model import SGDClassifier from sklearn.model_selection import cross_val_score import numpy as np import pickle import os from sklearn import ensemble, svm logger = getLogger('predict') labeled = pd.read_sql( 'SELECT rid, tag, vector FROM rawcontents WHERE assure>0.5', DB_ENGINE) train = labeled.groupby('vector')['tag'].mean() X = list(pd.Series(train.index).apply(pickle.loads)) y = list(train.values) logger.info('Build the model') clf = svm.SVC(kernel='rbf') logger.info('Fitting...') clf.fit(X, y) with open(DATA_ROOT / 'SVC_rbf_model.bin', 'wb') as f: pickle.dump(clf, f)
import pymysql import configparser import logging from argon2 import PasswordHasher from classes.User import User from classes.PSAlbum import PSAlbum from utils.log import getConsoleHandler, getFileHandler, getLogger import time psLogger = getLogger(__name__, "logs/photoshare.log") psLogger.debug("Loading DBConnection class") class dbConnection: USERNAME = '' PASSWORD = '' HOST = '' DATABASE_NAME = '' CHARSET = '' SQL_CONNECTION = '' def __init__(self, settings): self.HOST = settings.get('SQL', 'host') self.USERNAME = settings.get('SQL', 'user') self.PASSWORD = settings.get('SQL', 'password') self.DATABASE_NAME = settings.get('SQL', 'dbName') self.CHARSET = settings.get('SQL', 'charset')
from utils.log import getLogger, logging from utils.request import fetchJson, with_max_retries import json logger = getLogger('reward', logging.INFO) class FetchRewardInfoFailed(BaseException): pass # app def fetchRewardInfo(bbsGlobal): form = { 'method': 'bbs.api.getArticleDashangInfo', 'params.item': bbsGlobal['item'], 'params.articleId': bbsGlobal['artId'], } for kform, kglob in { 'params.rewardIds': 'tyfen_rewardIds', 'params.tyfIds': 'tyfen_tyfIds', 'params.shangIds': 'shangIds' }.items(): if len(bbsGlobal[kglob]) > 0 and bbsGlobal[kglob][0] == '0': form[kform] = bbsGlobal[kglob] else: form[kform] = '0,' + bbsGlobal[kglob]
def __init__(self, data_dict): self.callbacks = [] self.logger = getLogger("MemoryConfig") self.exists = True self.data = data_dict
Author: Hai Liang Wang <*****@*****.**> ''' import os import tensorflow as tf import shutil from config import Config from tqdm import tqdm from utils import log from munch import munchify from models.rnn import Model from dataset.textdata import TextData from time import localtime, strftime config = Config() logger = log.getLogger(__name__) def main(unused_argv): batch_data = TextData( munchify({ 'rootDir': config.root_dir, 'corpus': config.corpus_name, 'maxLength': config.train_max_length, 'maxLengthEnco': config.train_max_length_enco, 'maxLengthDeco': config.train_max_length_deco, 'datasetTag': '', 'test': False, 'watsonMode': False, 'batchSize': config.train_num_batch_size }))
import json import logging import time from functools import wraps import requests from bs4 import BeautifulSoup from utils.log import getLogger logger = getLogger('request', logging.INFO) session = requests.Session() session.headers[ 'User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36' # functions def lauchRequest(req_func, ret_func, sleep_time): def func(*args, sleep_time=sleep_time): time.sleep(sleep_time) rsp = req_func(*args) logger.debug('Return {} from {} {}'.format(rsp.status_code, rsp.request.method, rsp.url)) return ret_func(rsp), rsp return func