コード例 #1
0
ファイル: rss_parser.py プロジェクト: Shiwoo-Park/glance
def getText(nodelist):
    rc = ""
    for node in nodelist:
        try:
            rc += node.data.encode("utf8")
        except Exception, msg:
            getLogger().error(msg)
コード例 #2
0
ファイル: custom_parser.py プロジェクト: Shiwoo-Park/glance
def handleStringfilter(org_string, filter):

	ret_text = org_string

	try:
		if filter.startswith("****"):
			sep = filter[4:].strip()
			if org_string.count(sep) > 0:
				ret_text = org_string[:org_string.rfind(sep)]
		elif filter.endswith("****"):
			sep = filter[:-4].strip()
			if org_string.count(sep) > 0:
				ret_text = org_string[org_string.find(sep)+len(sep):].strip()
		elif filter.count("****") > 0:
			pieces = filter.split("****")
			s_cur = org_string.find(pieces[0].strip())
			e_cur = org_string.rfind(pieces[1].strip())
			if s_cur >=0 and e_cur > s_cur:
				ret_text = org_string[s_cur+len(pieces[0].strip()):e_cur]
		else:
			del_pieces = filter.split(";")
			for piece in del_pieces:
				org_string = org_string.replace(piece, "")
			ret_text = org_string
	except Exception, msg:
		getLogger().error(msg)
コード例 #3
0
ファイル: feedFinder.py プロジェクト: Shiwoo-Park/glance
def getRssInfo(rss):
	downLoader = Downloader()
	try:
		(t_url, header, html) = downLoader.open(rss) 
		print "download ", rss
	except Exception, msg:
		getLogger().error("feed download error : %s %s", msg, rss)
		return None
コード例 #4
0
ファイル: factory_manager.py プロジェクト: NotAFile/Ultros
    def __init__(self):
        self.commands = CommandManager()
        self.event_manager = EventManager()
        self.logger = getLogger("Manager")
        self.plugman = PluginManager(self)
        self.yapsy_logger = getLogger("yapsy")

        self.metrics = None
コード例 #5
0
ファイル: factory_manager.py プロジェクト: gsingh123/Ultros
    def __init__(self):
        self.commands = CommandManager()
        self.event_manager = EventManager()
        self.logger = getLogger("Manager")
        self.plugman = PluginManager(self)
        self.yapsy_logger = getLogger("yapsy")

        self.metrics = None
コード例 #6
0
ファイル: rss_parser.py プロジェクト: Shiwoo-Park/glance
    def parse(self, contents, temp):
        # resultReturn
        result_dict = dict()

        for field in ["title", "link", "image", "generator", "language", "description", "writer"]:
            result_dict[field] = ""

        try:
            self.dom = xml.dom.minidom.parseString(contents)
            self.title = getText(self.dom.getElementsByTagName("title")[0].childNodes)

            result_dict["title"] = self.title

            if len(self.dom.getElementsByTagName("link")) > 0:
                result_dict["link"] = getText(self.dom.getElementsByTagName("link")[0].childNodes).strip()

            if len(self.dom.getElementsByTagName("image")) > 0:
                result_dict["image"] = getText(
                    self.dom.getElementsByTagName("image")[0].getElementsByTagName("url")[0].childNodes
                )

            if len(self.dom.getElementsByTagName("generator")) > 0:
                result_dict["generator"] = getText(self.dom.getElementsByTagName("generator")[0].childNodes)

            if result_dict["generator"].find("wordpress") >= 0:
                return self.parseWordPress(contents)
            if result_dict["generator"].lower().find("blogger") >= 0:
                return self.parseBlogspot(contents)

            if len(self.dom.getElementsByTagName("language")) > 0:
                result_dict["language"] = getText(self.dom.getElementsByTagName("language")[0].childNodes)
            if len(self.dom.getElementsByTagName("description")) > 0:
                result_dict["description"] = getText(self.dom.getElementsByTagName("description")[0].childNodes)
            try:

                if len(self.dom.getElementsByTagName("managingEditor")) > 0:
                    result_dict["writer"] = getText(self.dom.getElementsByTagName("managingEditor")[0].childNodes)
                elif len(self.dom.getElementsByTagName("webMaster")) > 0:
                    result_dict["writer"] = getText(self.dom.getElementsByTagName("webMaster")[0].childNodes)
                else:
                    tt_list = self.dom.getElementsByTagName("author")

                    try:
                        if len(tt_list) > 0:
                            writer = getText(tt_list[0].getElementsByTagName("name")[0].childNodes)
                            if writer != "":
                                result_dict["writer"] = writer
                                tt_node = self.dom.getElementsByTagName("author")[0].getElementsByTagName("gd:image")[0]
                                image = tt_node.attributes["src"].value.encode("utf8")
                                if image != "":
                                    result_dict["image"] = image
                    except Exception, msg:
                        pass
            except Exception, msg:
                getLogger().error(msg)

        except Exception, msg:
            getLogger().error(msg)
コード例 #7
0
ファイル: feedFinder.py プロジェクト: Shiwoo-Park/glance
def getTistoryId(url):
	downLoader = Downloader()
	attr_dict = dict()
	attr_dict["tid"] = "livere_blogurl = '****.tistory.com';"
	attr_dict["tid2"] = """__addParam("author","****");"""
	try:
		(t_url, header, html) = downLoader.open(url) 
		print "download", url
	except Exception, msg:
		getLogger().error("feed download error : %s %s", msg, rss)
		return None
コード例 #8
0
ファイル: db_util.py プロジェクト: Shiwoo-Park/crawlpy
def getDBConnection(host, usr, pwd, db, cursor_type="normal"):
	db_connect = None
	db_cursor = None
	try:
		if cursor_type == "dict":
			db_connect = MySQLdb.connect(host, usr, pwd, db, cursorclass=MySQLdb.cursors.DictCursor)
		else:
			db_connect = MySQLdb.connect(host, usr, pwd, db)
		db_connect.set_character_set('utf8')
		db_cursor = db_connect.cursor()
	except Exception, msg:
		getLogger().error("getDBCursor() Failed : %s"%msg)
コード例 #9
0
ファイル: db_util.py プロジェクト: HDNua/crawlpy
def getDBConnection(host, usr, pwd, db, cursor_type="normal"):
    db_connect = None
    db_cursor = None
    try:
        if cursor_type == "dict":
            db_connect = MySQLdb.connect(
                host, usr, pwd, db, cursorclass=MySQLdb.cursors.DictCursor)
        else:
            db_connect = MySQLdb.connect(host, usr, pwd, db)
        db_connect.set_character_set('utf8')
        db_cursor = db_connect.cursor()
    except Exception, msg:
        getLogger().error("getDBCursor() Failed : %s" % msg)
コード例 #10
0
ファイル: data_producer.py プロジェクト: Shiwoo-Park/glance
def makeOutputDict(document_data):

    try:

        if document_data.parsing_result:
            result_dict = document_data.parsing_result
        else:
            result_dict = dict()

        result_dict["mode"] = document_data.mode
        result_dict["type"] = document_data.type
        result_dict["guid"] = document_data.guid
        result_dict["crawlTime"] = document_data.crawl_time

        if "body" in result_dict:
            result_dict["body"] = result_dict["body"].replace("]", "]")
        if "bodyHtml" in result_dict:
            result_dict["bodyHtml"] = result_dict["bodyHtml"].replace("]", "]")
        if "title" in result_dict:
            result_dict["title"] = " ".join(result_dict["title"].replace("]", "]").split())

        result_dict["webLink"] = document_data.down_url
        result_dict["mobileLink"] = document_data.mobile_url

        if document_data.type == "NEWS":
            result_dict["channelName"] = document_data.domain_data.name
            result_dict["channelIdentifier"] = document_data.domain_data.url
            result_dict["sourceType"] = 4
            try:
                image_data = document_data.image_data
                if "78x78" in image_data:
                    result_dict["imageThumbnail78x78"] = image_data["78x78"]
                if "126x126" in image_data:
                    result_dict["imageThumbnail126x126"] = image_data["126x126"]
                if "signature" in image_data:
                    result_dict["imageThumbnailSignature"] = image_data["signature"]
            except Exception, msg:
                getLogger().error(msg)
        elif document_data.type == "BBS":
            for int_field in ["readCount", "replyCount", "recommendCount", "videoCount", "imageCount"]:
                if int_field not in result_dict:
                    result_dict[int_field] = 0
            result_dict["siteName"] = document_data.domain_data.name
            result_dict["siteIdentifier"] = document_data.domain_data.url
            outLinks = list()
            for link in result_dict["bodyLinks"]:
                l_data = result_dict["bodyLinks"][link]
                outLinks.append("%s\t%s" % (link, l_data.text))
            result_dict["outLinks"] = "\n".join(outLinks)
コード例 #11
0
ファイル: kafkaClient.py プロジェクト: Shiwoo-Park/glance
    def sendData(self, data, sc="NEWS"):
        try:

            if sc != "NEWS":
                return "NO SERVICE"

            res = self.producer.send_messages(sc, data)
            if str(res).find("error=0") >= 0:
                return "OK"
            else:
                getLogger().error(str(res))
                return "ERROR"
        except Exception, msg:
            getLogger().error(msg)
            return "ERROR"
コード例 #12
0
ファイル: xml_producer.py プロジェクト: Shiwoo-Park/crawlpy
	def __init__(self, _outputDirPath=os.getcwd(), _documentCountLimit=1000):
		self.fieldListDic = dict()
		self.outputDirPath = _outputDirPath
		self.documentCountLimit = _documentCountLimit
		self.logger = getLogger()
		self.setupOutputDir()
		self.initSCFieldListDic()
コード例 #13
0
ファイル: versions.py プロジェクト: gsingh123/Ultros
    def __init__(self):
        self.log = getLogger("Updates")
        self.current = current
        self.current_v = StrictVersion(current)

        self.load_release()
        self.do_warnings()
コード例 #14
0
 def __init__(self, _outputDirPath=os.getcwd(), _documentCountLimit=1000):
     self.fieldListDic = dict()
     self.outputDirPath = _outputDirPath
     self.documentCountLimit = _documentCountLimit
     self.logger = getLogger()
     self.setupOutputDir()
     self.initSCFieldListDic()
コード例 #15
0
	def _getRules(self, url, verbose=False):
		"""
		Returns the RobotTextRules object for url(site-level or dir-level)
		First:  use internal cache
		Second: use memcache
		Third:  download robots.txt and parsing 
		"""
		logger = log.getLogger()

		# 1. use stored robots dictionary cache
		robots_site_path = urlparse.urljoin(url, "/robots.txt")	# Then the site-level
		if robots_site_path in self.robots:
			if verbose:
				logger.info("robotstxt in local memory: %s", robots_site_path)
			return self.robots[robots_site_path]
		
		# 2. use memcache
		rules = None
		try:
			# 3. download robots text
			rules = self._parsingRobotsFile(robots_site_path)	# First try site-level
			if verbose:
				logger.info("robotstxt downloaded: %s: %s", rules.return_code, robots_site_path)
			self.robots[robots_site_path] = rules

		except:
			pass

		return rules
コード例 #16
0
ファイル: image_util.py プロジェクト: Shiwoo-Park/glance
def isOldImage(chk_api, hash_key):

	try:
		cmd = "curl %s --connect-timeout 5 --max-time 10 "%chk_api
		exist = True
		fd = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
		for line in fd.stdout.readlines():
			results = line.strip()
			if results.find("404") >= 0:
				exist = False
				break
			if results.find(hash_key.upper()) >= 0:
				getLogger().info("same hash")
				return True
	except Exception, msg:
		getLogger().error(msg)
コード例 #17
0
ファイル: custom_parser.py プロジェクト: Shiwoo-Park/glance
	def parse(self, header, html, url, parser_id=None):

		if self.prm == None:
			self.setRules()

		ret_dict = self.parser.plugParser(header,  html, url)

		result_dict = dict()
		if parser_id != None and parser_id in self.prm.id_dict:
			try:
				host_rule = self.prm.id_dict[parser_id]
				result_dict = self.getDataByRule(host_rule, ret_dict, url)
				result_dict["parser_id"] = parser_id
				return result_dict
			except Exception, msg:
				getLogger().error(msg)
コード例 #18
0
ファイル: image_util.py プロジェクト: Shiwoo-Park/glance
def uploadImage(upload_url, file):
	retry = 0
	results = ""
	m_t = time.time()
	while retry < 3:
		try:
			cmd = "curl --upload-file %s %s --connect-timeout 5 --max-time 10 --header 'Expect:' "%(file, upload_url+"/fileext/jpg")
			fd = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
			for line in fd.stdout.readlines():
				results = line.strip()
				if results.startswith("OK"):
					e_t = time.time()
					getLogger().info("%s upload time :  %s", upload_url, e_t - m_t)
					return "OK"
		except Exception, msg:
			getLogger().error(msg)
		retry += 1
コード例 #19
0
ファイル: image_util.py プロジェクト: Shiwoo-Park/glance
def downloadImage(img_url):
	rq = mechanize.Request(img_url)
	try:
		rs = mechanize.urlopen(rq)
		http_content = rs.read()
		header = rs.info()
		return http_content
	except Exception, msg:
		try:
			getLogger().error("%s %s", img_url, msg)
			time.sleep(1)
			rs = mechanize.urlopen(rq)
			http_content = rs.read()
			return http_content
		except Exception, msg:
			getLogger().error("%s %s", img_url, msg)
			return str(msg)
コード例 #20
0
ファイル: manager.py プロジェクト: NotAFile/Ultros
 def __init__(self):
     self.logger = getLogger("Help")
     self.add_topic(
         AliasListTopic("aliases", HelpTopicType.GenericTopic)
     )
     self.add_topic(
         ComandListTopic("commands", HelpTopicType.GenericTopic)
     )
コード例 #21
0
ファイル: config.py プロジェクト: NotAFile/Ultros
    def __init__(self, filename):
        self.callbacks = []

        self.logger = getLogger("YamlConfig")
        # Some sanitizing here to make sure people can't escape the config dirs
        filename = filename.strip("..")
        self.filename = filename
        self.exists = self.reload(False)
コード例 #22
0
    def __init__(self, filename):
        self.callbacks = []

        self.logger = getLogger("YamlConfig")
        # Some sanitizing here to make sure people can't escape the config dirs
        filename = filename.strip("..")
        self.filename = filename
        self.exists = self.reload(False)
コード例 #23
0
ファイル: manager.py プロジェクト: NotAFile/Ultros
    def __init__(self, factory_manager=None,
                 path="./plugins", module="plugins"):
        if factory_manager is None:
            raise ValueError("Factory manager cannot be None!")

        self.log = getLogger("Plugins")

        self.factory_manager = factory_manager

        self.module = module
        self.path = path
コード例 #24
0
ファイル: urlpattern.py プロジェクト: Shiwoo-Park/glance
def makeUrlFromPattern(patterns, ret_key_dict):
	try:
		for ret_url in patterns:
			anypath_str = ""
			for kk in ret_key_dict:
				if (kk in INT_KEY or kk.startswith("INT_")) and  not isInt(ret_key_dict[kk]) :
					break
				if kk == "ANYPATH" and ret_key_dict[kk].strip() == "":
					ret_url = ret_url.replace("/(ANYPATH)/", "/")
				else:
					ret_url = ret_url.replace("("+kk+")", ret_key_dict[kk])
					if kk == "ANYPATH":
						anypath_str = "/"+ret_key_dict[kk]

			if ret_url.count("(") == 0 and ret_url.count(")") == 0:
				ret_url = quote(ret_url.strip(), safe=RESERVED)
				ret_url = ret_url.replace("://m.www.","://m.").replace(anypath_str, "")
				return ret_url
	except Exception, msg:
		getLogger().error(msg)
コード例 #25
0
ファイル: data_producer.py プロジェクト: Shiwoo-Park/glance
    def writeEachDocumentData(self, f, dataDic, type):

        try:
            if type == "test":
                f.write("%s\t%s\t%s\n" % (dataDic["guid"], dataDic["title"], dataDic["bodyHtml"]))
            else:
                keyList = None
                if "type" in dataDic:
                    scType = dataDic["type"]
                    if scType in self.fieldListDic:
                        keyList = self.fieldListDic[scType]

                if keyList:  # 필드명 지정
                    for field in keyList:
                        if field in dataDic and dataDic[field] and dataDic[field] != "":
                            f.write("		<%s><![CDATA[%s]]></%s>\n" % (field, dataDic[field], field))
                else:  # 필드명 미지정(모두출력)
                    for key, val in dataDic.items():
                        f.write("		<%s><![CDATA[%s]]></%s>\n" % (key, val, key))
        except Exception, msg:
            getLogger().error("%s %s FILE WRITE ERROR", msg, dataDic)
コード例 #26
0
ファイル: protocol.py プロジェクト: domainr/Ultros-contrib
    def __init__(self, name, factory, config):
        NoChannelsProtocol.__init__(self, name, factory, config)

        self.log = getLogger(self.name)
        self.event_manager = EventManager()
        self.command_manager = CommandManager()

        reactor.connectTCP(
            self.config["connection"]["host"],
            self.config["connection"]["port"],
            self.factory,
            120
        )
コード例 #27
0
    def __init__(self,
                 factory_manager=None,
                 path="./plugins",
                 module="plugins"):
        if factory_manager is None:
            raise ValueError("Factory manager cannot be None!")

        self.log = getLogger("Plugins")

        self.factory_manager = factory_manager

        self.module = module
        self.path = path
コード例 #28
0
ファイル: psw_url_parser.py プロジェクト: HDNua/crawlpy
	def __init__(self, init_dic):
		self.info_dic = dict()
		self.logger = getLogger()
		if self.isValidInfo(init_dic):
			self.info_dic = init_dic
		else:
			self.logger.error("Invalid init information")
			exit(1)
		self.url_parser = URLParser()  # 가장 기본형태의 URL 파서

		self.pattern_dic = {"normal":dict(), "host_key":dict()}  # 모든 패턴정보를 담을 Dictionary
		# dic["normal" | "hostkey"][domain] = [(priority, URLData()), ...]

		self.build_pt_dic = dict()  # 추출해낸 예약어로 다른 원하는 여러가지의 URL을 만들어낼 수 있다.
コード例 #29
0
ファイル: factory.py プロジェクト: gsingh123/Ultros
 def __init__(self, protocol_name, config, manager):
     self.logger = getLogger("F: %s" % protocol_name)
     self.config = config
     self.manager = manager
     self.name = protocol_name
     self.ptype = config["main"]["protocol-type"]
     self.protocol_class = None
     self.protocol = None
     manager_config = manager.main_config
     reconnections = manager_config["reconnections"]
     self.r_delay = int(reconnections["delay"])
     self.r_attempts = int(reconnections["attempts"])
     self.r_on_drop = reconnections["on-drop"]
     self.r_on_failure = reconnections["on-failure"]
     self.r_reset = reconnections["reset-on-success"]
コード例 #30
0
ファイル: factory.py プロジェクト: NotAFile/Ultros
 def __init__(self, protocol_name, config, manager):
     self.logger = getLogger("F: %s" % protocol_name)
     self.config = config
     self.manager = manager
     self.name = protocol_name
     self.ptype = config["main"]["protocol-type"]
     self.protocol_class = None
     self.protocol = None
     manager_config = manager.main_config
     reconnections = manager_config["reconnections"]
     self.r_delay = int(reconnections["delay"])
     self.r_attempts = int(reconnections["attempts"])
     self.r_on_drop = reconnections["on-drop"]
     self.r_on_failure = reconnections["on-failure"]
     self.r_reset = reconnections["reset-on-success"]
コード例 #31
0
ファイル: data.py プロジェクト: NotAFile/Ultros
    def __init__(self, filename):
        self.callbacks = []

        self.logger = getLogger("Data")
        filename = filename.strip("..")

        folders = filename.split("/")
        folders.pop()
        folders = "/".join(folders)

        if not os.path.exists(folders):
            os.makedirs(folders)

        self.filename = filename
        self.reload(False)
コード例 #32
0
    def __init__(self, filename):
        self.callbacks = []

        self.logger = getLogger("Data")
        filename = filename.strip("..")

        folders = filename.split("/")
        folders.pop()
        folders = "/".join(folders)

        if not os.path.exists(folders):
            os.makedirs(folders)

        self.filename = filename
        self.reload(False)
コード例 #33
0
ファイル: data.py プロジェクト: NotAFile/Ultros
    def __init__(self, path, *args, **kwargs):
        self.callbacks = []

        self.logger = getLogger("Redis")

        self.path = path
        self.url = kwargs.get("url", None)

        self.logger.trace("Path: %s" % path)
        self.logger.trace("Args: %s" % (args or "[]"))
        self.logger.trace("KWArgs: %s" % (kwargs or "{}"))

        self.args = args
        self.kwargs = kwargs

        self.reconnect()
コード例 #34
0
	def isDisallowSite(self, url, verbose=False):
		"""
		robots.txt가 아래 문구를 포함하면 True를 리턴.
			User-agent: * or zumbot
			Disallow: /
		"""
		logger = log.getLogger()
		self.delay = 3
		robots_site_path = urlparse.urljoin(url, "/robots.txt")	# Then the site-level
		# 3. download robots text
		self.blocked = False
		rules = self._parsingRobotsFile(robots_site_path)	# First try site-level
		if self.blocked:
			return True, self.delay
		else:
			return False, self.delay
コード例 #35
0
    def __init__(self, path, *args, **kwargs):
        self.callbacks = []

        self.logger = getLogger("Redis")

        self.path = path
        self.url = kwargs.get("url", None)

        self.logger.trace("Path: %s" % path)
        self.logger.trace("Args: %s" % (args or "[]"))
        self.logger.trace("KWArgs: %s" % (kwargs or "{}"))

        self.args = args
        self.kwargs = kwargs

        self.reconnect()
コード例 #36
0
ファイル: console.py プロジェクト: gsingh123/Ultros
    def __init__(self):
        if not sys.stdout.isatty() or "--no-console" in sys.argv:
            self.wrapped = False
            return

        self.logger = getLogger("Console")

        self.old_stdout = sys.stdout
        self.old_stderr = sys.stderr

        self.wrapper = Wrapper(self)
        self.wrapper_err = WrapperErr(self)
        self.reader = Reader(self)

        # We set it here
        sys.stdout = self.wrapper
        sys.stderr = self.wrapper_err
コード例 #37
0
ファイル: console.py プロジェクト: gsingh123/Ultros
    def __init__(self):
        if not sys.stdout.isatty() or "--no-console" in sys.argv:
            self.wrapped = False
            return

        self.logger = getLogger("Console")

        self.old_stdout = sys.stdout
        self.old_stderr = sys.stderr

        self.wrapper = Wrapper(self)
        self.wrapper_err = WrapperErr(self)
        self.reader = Reader(self)

        # We set it here
        sys.stdout = self.wrapper
        sys.stderr = self.wrapper_err
コード例 #38
0
ファイル: protocol.py プロジェクト: gsingh123/Ultros
    def __init__(self, name, factory, config):
        self.name = name
        self.factory = factory
        self.config = config

        self.received = ""
        self.log = getLogger(self.name)
        self.log.info("Setting up..")

        self.command_manager = CommandManager()
        self.event_manager = EventManager()

        self.username = config["identity"]["username"]
        self.password = config["identity"]["password"]
        self.networking = config["network"]
        self.tokens = config["identity"]["tokens"]

        self.control_chars = config["control_chars"]

        audio_conf = config.get("audio", {})
        self.should_mute_self = audio_conf.get("should_mute_self", True)
        self.should_deafen_self = audio_conf.get("should_deafen_self", True)

        event = general_events.PreConnectEvent(self, config)
        self.event_manager.run_callback("PreConnect", event)

        context = self._get_client_context()
        if context is None:
            # Could not create a context (problem loading cert file)
            self.factory.manager.remove_protocol(self.name)
            return

        reactor.connectSSL(
            self.networking["address"],
            self.networking["port"],
            self.factory,
            context,
            120
        )

        event = general_events.PostConnectEvent(self, config)
        self.event_manager.run_callback("PostConnect", event)
コード例 #39
0
ファイル: protocol.py プロジェクト: domainr/Ultros-contrib
    def __init__(self, factory, config):
        self.factory = factory
        self.config = config
        self.log = getLogger("TS3")

        self.log.info("Setting up..")

        self.server = config["server"]
        self.identity = config["identity"]

        self.user = self.identity["username"]
        self.passw = self.identity["password"]
        self.sid = self.server["sid"]

        reactor.connectTCP(
            self.server["address"],
            self.server["port"],
            self.factory,
            120
        )
コード例 #40
0
ファイル: data.py プロジェクト: NotAFile/Ultros
    def __init__(self, path, *args, **kwargs):
        self.callbacks = []

        self.logger = getLogger("DBAPI")

        path = path.replace("//", "/")
        path = path.split("/", 1)[1]

        self.path = path

        self.logger.trace("Path: %s" % path)
        self.logger.trace("Args: %s" % (args or "[]"))
        self.logger.trace("KWArgs: %s" % (kwargs or "{}"))

        parsed_module = path.split(":", 1)[0]
        self.parsed_module = parsed_module
        self.args = args
        self.kwargs = kwargs

        self.logger.debug(_("Parsed module: %s") % parsed_module)

        self.reconnect()
コード例 #41
0
    def __init__(self, path, *args, **kwargs):
        self.callbacks = []

        self.logger = getLogger("DBAPI")

        path = path.replace("//", "/")
        path = path.split("/", 1)[1]

        self.path = path

        self.logger.trace("Path: %s" % path)
        self.logger.trace("Args: %s" % (args or "[]"))
        self.logger.trace("KWArgs: %s" % (kwargs or "{}"))

        parsed_module = path.split(":", 1)[0]
        self.parsed_module = parsed_module
        self.args = args
        self.kwargs = kwargs

        self.logger.debug(_("Parsed module: %s") % parsed_module)

        self.reconnect()
コード例 #42
0
ファイル: translations.py プロジェクト: gsingh123/Ultros
    def set_language(self, lang=None, mlang=None):
        if lang is None:
            lang = DEFAULT

        if mlang is None:
            mlang = DEFAULT

        self.get_known()

        if self.log and self.logger is None:
            from utils.log import getLogger

            self.logger = getLogger("Translations")

        if lang not in self.known:
            if self.logger is None:
                print "Unknown language '%s', defaulting to '%s'" \
                      % (lang, DEFAULT)
            else:
                self.logger.warn("Unknown language '%s', defaulting to '%s'" %
                                 (lang, DEFAULT))

            lang = DEFAULT

        if mlang not in self.known:
            if self.logger is None:
                print "Unknown language '%s', defaulting to '%s'" \
                      % (mlang, DEFAULT)
            else:
                self.logger.warn("Unknown language '%s', defaulting to '%s'" %
                                 (mlang, DEFAULT))

            mlang = DEFAULT

        self.language = lang
        self.m_language = mlang
        self.reload()
コード例 #43
0
ファイル: translations.py プロジェクト: NotAFile/Ultros
    def set_language(self, lang=None, mlang=None):
        if lang is None:
            lang = DEFAULT

        if mlang is None:
            mlang = DEFAULT

        self.get_known()

        if self.log and self.logger is None:
            from utils.log import getLogger

            self.logger = getLogger("Translations")

        if lang not in self.known:
            if self.logger is None:
                print "Unknown language '%s', defaulting to '%s'" \
                      % (lang, DEFAULT)
            else:
                self.logger.warn("Unknown language '%s', defaulting to '%s'"
                                 % (lang, DEFAULT))

            lang = DEFAULT

        if mlang not in self.known:
            if self.logger is None:
                print "Unknown language '%s', defaulting to '%s'" \
                      % (mlang, DEFAULT)
            else:
                self.logger.warn("Unknown language '%s', defaulting to '%s'"
                                 % (mlang, DEFAULT))

            mlang = DEFAULT

        self.language = lang
        self.m_language = mlang
        self.reload()
コード例 #44
0
ファイル: test_permissions.py プロジェクト: gsingh123/Ultros
    def __init__(self):
        self.logger = getLogger("Permissions")

        self.confdir = tmpdir + "/config/"
        self.datadir = tmpdir + "/data/"

        try:
            os.makedirs(self.confdir)
            os.makedirs(self.datadir)
            self.logger.debug("Config and data dirs created.")
        except Exception:
            pass

        yaml.dump({"editor_warning": False},
                  open(self.confdir + "settings.yml", "w"))

        self.storage = StorageManager(self.confdir, self.datadir)

        self.data = self.storage.get_file(self, "data", formats.YAML,
                                          "permissions.yml")

        self.handler = permissionsHandler(self, self.data)

        super(PluginObject, self).__init__()
コード例 #45
0
ファイル: rpc.py プロジェクト: LuminousXLB/CrawlerTY
import logging
import os
import pickle
from pprint import pprint

import numpy as np
import pandas as pd
import zerorpc
from sklearn.linear_model import SGDClassifier
from sqlalchemy import update

from model.db import DB_ENGINE, rawcontents
from utils.log import getLogger

logger = getLogger('semiTrain')


def fetchAllData(threshold):
    return pd.read_sql(
        'SELECT rid, tag, assure FROM rawcontents WHERE LENGTH(content) > {}'.
        format(threshold), DB_ENGINE)


def randomSelectData(data, count):
    inds = np.arange(len(data))
    np.random.shuffle(inds)
    return data.iloc[inds[:count]].copy()


def completeTrainData(current):
    stmt = 'SELECT rid, content, vector FROM rawcontents WHERE rid IN {}'
コード例 #46
0
    def __init__(self, data_dict):
        self.callbacks = []

        self.logger = getLogger("Data")
        self.data = data_dict
コード例 #47
0
    def __init__(self):
        self.log = getLogger("GetchUnix")
        import tty
        import sys

        self.log.trace(_("Loaded: %s, %s") % (tty, sys))
コード例 #48
0
ファイル: db.py プロジェクト: LuminousXLB/CrawlerTY
from collections import namedtuple

from sqlalchemy import MetaData, create_engine
from sqlalchemy.schema import (Column, ForeignKey, ForeignKeyConstraint, Index,
                               PrimaryKeyConstraint, Table, UniqueConstraint)
from sqlalchemy.types import BLOB, Boolean, DateTime, Float, Integer, String

from settings import DB_ENGINE_FILE, ECHO_DATABASE_INFO
from utils.log import getLogger

logger = getLogger('db')

# models

DB_ENGINE = create_engine('sqlite:///{}'.format(DB_ENGINE_FILE),
                          echo=ECHO_DATABASE_INFO)

metadata = MetaData()

posts = Table(
    'posts', metadata, Column('pid', Integer, autoincrement=True),
    Column('blockid', String, nullable=False, comment='板块id'),
    Column('postid', Integer, nullable=False, comment='帖子id'),
    Column('title', String, nullable=False, comment='帖子标题'),
    Column('pageurl', String, nullable=False, comment='帖子首页url'),
    Column('subType', String, comment='帖子子类型'),
    Column('activityuserid', Integer, nullable=False, comment='楼主id'),
    Column('clickcount', Integer, nullable=False, comment='点击数'),
    Column('replycount', Integer, nullable=False, comment='回复数'),
    Column('remarkcount', Integer, nullable=False, comment='楼主发言数'),
    Column('imgcount', Integer, nullable=False, comment='图片数'),
コード例 #49
0
ファイル: tokens.py プロジェクト: gsingh123/Ultros
    def __init__(self):
        self.token_regex = re.compile(r"\{[^}]*\}")
        self.parse_regex = re.compile(r"(?<!\\):")
        self.escape_regex = re.compile(r"\\:")

        self.logger = getLogger("Tokens")
コード例 #50
0
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from gensim.test.utils import common_texts
from sqlalchemy import select

from model.db import DB_ENGINE, posts, rawcontents
from utils.log import getLogger

logger = getLogger('doc2vec')

# init

with DB_ENGINE.connect() as conn:
    s = select([posts.c.title])
    documents = [
        TaggedDocument(doc, [pid]) for pid, doc in enumerate(conn.execute(s))
    ]

model = Doc2Vec(documents, vector_size=2048, window=5, min_count=1, workers=4)

model.save('RuntimeTY/d2v_2048_5_1216')

model = Doc2Vec.load('RuntimeTY/d2v_2048_5_1216')

logger.critical('Loaded')

with DB_ENGINE.connect() as conn:
    s = select([rawcontents])
    buffer = []

    for row in conn.execute(s):
        rid = row[rawcontents.c.rid]
コード例 #51
0
 def __init__(self):
     self.logger = getLogger("Help")
     self.add_topic(AliasListTopic("aliases", HelpTopicType.GenericTopic))
     self.add_topic(ComandListTopic("commands", HelpTopicType.GenericTopic))
コード例 #52
0
    def __init__(self):
        self.log = getLogger("GetchWindows")
        import msvcrt

        self.log.trace(_("Loaded: %s") % msvcrt)
コード例 #53
0
ファイル: mainpart.py プロジェクト: LuminousXLB/CrawlerTY
import logging
import re
from urllib import parse

import demjson

from model.db import Reply
from utils.datetime import parseDatetimeString
from utils.log import getLogger
from utils.request import getSoup, with_max_retries

logger = getLogger('parser', logging.INFO)


class FetchPostFailed(BaseException):
    pass


# app


@with_max_retries(3, 10)
def extractAll(blockid, postid):
    url = urlFactory(blockid, postid, 1)
    soup = getPage(url)
    if soup == None:
        return None

    bbsGlobal = extractBBSGlobal(soup)
    if bbsGlobal['isWenda'] or bbsGlobal['subType'] == '本版隐藏':
        logger.info('Got subtype with {} in {}'.format(bbsGlobal['subType'],
コード例 #54
0
from utils.log import getLogger
import pandas as pd
from model.db import DB_ENGINE
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import cross_val_score
import numpy as np
import pickle
import os
from sklearn import ensemble, svm

logger = getLogger('predict')

labeled = pd.read_sql(
    'SELECT rid, tag, vector FROM rawcontents WHERE assure>0.5', DB_ENGINE)

train = labeled.groupby('vector')['tag'].mean()

X = list(pd.Series(train.index).apply(pickle.loads))
y = list(train.values)

logger.info('Build the model')

clf = svm.SVC(kernel='rbf')

logger.info('Fitting...')

clf.fit(X, y)

with open(DATA_ROOT / 'SVC_rbf_model.bin', 'wb') as f:
    pickle.dump(clf, f)
コード例 #55
0
ファイル: DBConnection.py プロジェクト: marcus604/Photoshare
import pymysql
import configparser
import logging
from argon2 import PasswordHasher
from classes.User import User
from classes.PSAlbum import PSAlbum
from utils.log import getConsoleHandler, getFileHandler, getLogger
import time


psLogger = getLogger(__name__, "logs/photoshare.log")
psLogger.debug("Loading DBConnection class")

class dbConnection:

    USERNAME = ''
    PASSWORD = ''
    HOST = ''
    DATABASE_NAME = ''
    CHARSET = ''
    SQL_CONNECTION = ''


    def __init__(self, settings):
        self.HOST           = settings.get('SQL', 'host')
        self.USERNAME       = settings.get('SQL', 'user')
        self.PASSWORD       = settings.get('SQL', 'password')
        self.DATABASE_NAME  = settings.get('SQL', 'dbName')
        self.CHARSET        = settings.get('SQL', 'charset')

コード例 #56
0
from utils.log import getLogger, logging
from utils.request import fetchJson, with_max_retries
import json

logger = getLogger('reward', logging.INFO)


class FetchRewardInfoFailed(BaseException):
    pass


# app


def fetchRewardInfo(bbsGlobal):
    form = {
        'method': 'bbs.api.getArticleDashangInfo',
        'params.item': bbsGlobal['item'],
        'params.articleId': bbsGlobal['artId'],
    }

    for kform, kglob in {
            'params.rewardIds': 'tyfen_rewardIds',
            'params.tyfIds': 'tyfen_tyfIds',
            'params.shangIds': 'shangIds'
    }.items():
        if len(bbsGlobal[kglob]) > 0 and bbsGlobal[kglob][0] == '0':
            form[kform] = bbsGlobal[kglob]
        else:
            form[kform] = '0,' + bbsGlobal[kglob]
コード例 #57
0
    def __init__(self, data_dict):
        self.callbacks = []

        self.logger = getLogger("MemoryConfig")
        self.exists = True
        self.data = data_dict
コード例 #58
0
Author: Hai Liang Wang <*****@*****.**>
'''

import os
import tensorflow as tf
import shutil
from config import Config
from tqdm import tqdm
from utils import log
from munch import munchify
from models.rnn import Model
from dataset.textdata import TextData
from time import localtime, strftime

config = Config()
logger = log.getLogger(__name__)


def main(unused_argv):
    batch_data = TextData(
        munchify({
            'rootDir': config.root_dir,
            'corpus': config.corpus_name,
            'maxLength': config.train_max_length,
            'maxLengthEnco': config.train_max_length_enco,
            'maxLengthDeco': config.train_max_length_deco,
            'datasetTag': '',
            'test': False,
            'watsonMode': False,
            'batchSize': config.train_num_batch_size
        }))
コード例 #59
0
ファイル: request.py プロジェクト: LuminousXLB/CrawlerTY
import json
import logging
import time
from functools import wraps

import requests
from bs4 import BeautifulSoup

from utils.log import getLogger

logger = getLogger('request', logging.INFO)

session = requests.Session()
session.headers[
    'User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'

# functions


def lauchRequest(req_func, ret_func, sleep_time):
    def func(*args, sleep_time=sleep_time):
        time.sleep(sleep_time)
        rsp = req_func(*args)

        logger.debug('Return {} from {} {}'.format(rsp.status_code,
                                                   rsp.request.method,
                                                   rsp.url))

        return ret_func(rsp), rsp

    return func