Example #1
0
	def listen(self):
		while 1:
			if not self.connected:
				self.connect()
			if self.connected:
				self.sleep_time = 5
				r = self.sock.recv(1024*1024*10).decode("utf-8")
				if r == '':
					self.connected = False
					logger.warning("connection zum backend gestorben")
					time.sleep(1)
					continue
				## zerstückelte blöcke?
				for d in r.split("\n"):
					if d == '':
						continue
					if d == '\n':
						pass
					else:
						try:
							self.parse(json.loads(d))
						except Exception as e:
							logger.exception("failed to parse backend message")
			else:
				self.sleep_time = min(self.sleep_time * 3, 60*60*3)
				if not self.suppress_connection_warnings:
					logger.debug("No connection to Backend; sleeping " + str(self.sleep_time) + " seconds.")
				time.sleep(self.sleep_time)
 def launch(self, demo):
     """
     Update records for demo launches
     :param demo: name of the launched demo
     :return:
     """
     try:
         self.cassandra_cluster.session.execute(
             self.cassandra_cluster.insert_launch_statement.bind((
                 self.today,
                 self.request_timeuuid,
                 demo,
                 self.user,
                 self.form_variables,
                 datetime.datetime.now()
             ))
         )
         self.cassandra_cluster.session.execute(
             self.cassandra_cluster.insert_demo_launch_statement.bind((
                 demo,
                 self.request_timeuuid,
                 self.user,
                 self.form_variables,
                 datetime.datetime.now()
             ))
         )
     except:
         logger.exception('Database inaccessible!')
 def delete_dest(self):
     #cleanup destination file
     try:
         if os.path.exists(self.destpath):
             os.remove(self.destpath)
     except:
         log.exception("Couldn't clean up file %s", self.destpath)
Example #4
0
def codr_hash():
	try:
		with open(".data_and_stuff/codr_md5", "rb") as f:
			return {"md5_b64": base64.b64encode(f.read()).decode("ascii")}, 200
	except Exception as e:
		logger.exception(e)
		return {}, 500
def cookie2user(cookie_str):
	if not cookie_str:
		return None
	try:
		L = cookie_str.split('-')
		if len(L) != 3:
			# 如果不是3个元素的话,与我们当初构造sha1字符串时不符,返回None
			return None
		uid, expires, sha1 = L
		# 分别获取到用户id,过期时间和sha1字符串
		if int(expires) < time.time():
			# 如果超时(超过一天),返回None
			return None
		user = yield from User.find(uid)
		# 根据用户id(id为primary key)查找库,对比有没有该用户
		if user is None:
			return None
		s = '%s-%s-%s-%s' % (uid, user.passwd, expires, _COOKIE_KEY)
		# 根据查到的user的数据构造一个校验sha1字符串
		if sha1 != hashlib.sha1(s.encode('utf-8')).hexdigest():
			logger.info('invalid sha1')
			return None

		user.passwd = '*******'
		return user
	except Exception as e:
		logger.exception(e)
		return None
Example #6
0
    def get_articles(self):
        resultList = []
        sections = [('要聞', 'http://toronto.singtao.ca/category/%e8%a6%81%e8%81%9e/?variant=zh-hk'),
                    ('城市', 'http://toronto.singtao.ca/category/%e5%9f%8e%e5%b8%82/?variant=zh-hk'),
                    ('加國', 'http://toronto.singtao.ca/category/%e5%8a%a0%e5%9c%8b/?variant=zh-hk'),
                    ('國際', 'http://toronto.singtao.ca/category/%e5%9c%8b%e9%9a%9b/?variant=zh-hk'),
                    ('港聞', 'http://toronto.singtao.ca/category/%e6%b8%af%e8%81%9e/?variant=zh-hk'),
                    ('中國', 'http://toronto.singtao.ca/category/%e4%b8%ad%e5%9c%8b/?variant=zh-hk'),
                    ('台灣', 'http://toronto.singtao.ca/category/%e5%8f%b0%e7%81%a3/?variant=zh-hk'),
                    ('體育', 'http://toronto.singtao.ca/category/%e9%ab%94%e8%82%b2/?variant=zh-hk'),
                    ('財經', 'http://toronto.singtao.ca/category/%e8%b2%a1%e7%b6%93/?variant=zh-hk'),
                    ('娛樂', 'http://toronto.singtao.ca/category/%e5%a8%9b%e6%a8%82/?variant=zh-hk'),]

        try:
            for (title, url) in sections:
                # for each section, insert a title...
                resultList.append(self.create_section(title))
                # ... then parse the page and extract article links
                doc = html.document_fromstring(read_http_page(url))
                for option in doc.get_element_by_id('news').xpath('option'):
                    if option.text and option.get('value'):
                        resultList.append(self.create_article(option.text.strip(), option.get('value')))


        except Exception as e:
            logger.exception('Problem processing url')

        return resultList
Example #7
0
File: api.py Project: jun9/mcu-api
    def request(self, method_name, params):
        """
            Send a request to Codian MCU API, adding username and password
        """
        try:
            logger.debug("API Request - Method: %s - Params: %s" % (method_name, params))
            # add authentication params to the request
            params = dict(params.items() + self._auth_data.items())
            # convert dict to xml for the request
            xmlrequest = xmlrpclib.dumps(tuple([params]), method_name)
            logger.debug("%s" % xmlrequest)

            conn = httplib.HTTPSConnection(conf.MCU_API_HOSTNAME)
            # add the correct headers
            headers = {"Content-type": "text/xml", "charset": "utf-8", "Content-Length": "%d" % len(xmlrequest)}
            # send the request to the API url
            conn.request("POST", conf.MCU_API_URL, headers=headers)
            conn.send(xmlrequest)
            # get the response
            response = conn.getresponse()
            response = response.read()
            logger.debug("Response: %s" % response)
            # close connection
            conn.close()

            # convert xml response to dict
            return xmlrpclib.loads(response)
        except xmlrpclib.Fault as err:
            logger.exception("XMLRPC request FAILED using Codian MSE API: %s" % err)
        except Exception as err:
            logger.exception("XMLRPC exception using Codian MSE API: %s" % err)
Example #8
0
    def get_articles(self):
        resultList = []
        sections = [('Vancouver', 'http://www.theprovince.com/scripts/Sp6Query.aspx?catalog=VAPR&tags=category|news|subcategory|metro%20vancouver'),
                    ('Fraser Valley', 'http://www.theprovince.com/scripts/Sp6Query.aspx?catalog=VAPR&tags=category|news|subcategory|fraser%20valley'),
                    ('B.C.', 'http://www.theprovince.com/scripts/Sp6Query.aspx?catalog=VAPR&tags=category|news|subcategory|b.c.'),]
        relSections = [('Canada', 'http://www.theprovince.com/7588609.atom'),
                    ('World', 'http://www.theprovince.com/7589147.atom'), ]

        try:
            for (title, url) in sections:
                # for each section, insert a title...
                resultList.append(self.create_section(title))
                # ... then parse the page and extract article links
                doc = etree.fromstring(read_http_page(url))
                for entry in doc.xpath('//ns:entry[@Status="FREE"]', namespaces={'ns': 'http://www.w3.org/2005/Atom'}):
                    title = entry.xpath('ns:title[@type="html"]', namespaces={'ns': 'http://www.w3.org/2005/Atom'})[0].text
                    link = 'http://www.theprovince.com' + entry.xpath('ns:link[@type="text/html"]', namespaces={'ns': 'http://www.w3.org/2005/Atom'})[0].get('href')
                    abstract = entry.xpath('ns:link[@type="text/html"]', namespaces={'ns': 'http://www.w3.org/2005/Atom'})[0].get('Abstract')
                    resultList.append(self.create_article(title.strip(), link, abstract))

            for (title, url) in relSections:
                # for each section, insert a title...
                resultList.append(self.create_section(title))
                # ... then parse the page and extract article links
                doc = etree.fromstring(read_http_page(url))
                for entry in doc.xpath('//ns:entry[@Status="FREE"]', namespaces={'ns': 'http://www.w3.org/2005/Atom'}):
                    title = entry.xpath('ns:title[@type="html"]', namespaces={'ns': 'http://www.w3.org/2005/Atom'})[0].text
                    link = 'http://www.theprovince.com' + entry.xpath('ns:link[@type="text/xml"]', namespaces={'ns': 'http://www.w3.org/2005/Atom'})[0].get('href')
                    abstract = entry.xpath('ns:link[@type="text/xml"]', namespaces={'ns': 'http://www.w3.org/2005/Atom'})[0].get('Abstract')
                    resultList.append(self.create_article(title.strip(), link, abstract))

        except Exception as e:
            logger.exception('Problem processing url')

        return resultList
Example #9
0
 def post(self, host_ip):
     error = None
     use = self.get_argument("use", "").strip()
     interface_index = self.get_argument("interface_index", "").strip()
     in_warn = self.get_argument("in_warn", "").strip()
     out_warn = self.get_argument("out_warn", "").strip()
     in_crit = self.get_argument("in_crit", "").strip()
     out_crit = self.get_argument("out_crit", "").strip()
     
     snmp_supported, community = nagios.check_if_snmp_supported(host_ip)
     if not snmp_supported:
         error = "failed"
         logger.error("host %s does not support SNMP!" % host_ip)
         all_interface = self.get_all_interface(host_ip, community)
         return self.render("infrastracture/add_service_data_traffic.html",
                            error=error, host_ip=host_ip,
                            snmp_supported=False,
                            all_interface=all_interface)
         
     try:
         speed, status, name, index = snmp.get_int_status(host_ip, community,
                                                          interface_index=interface_index)
     except Exception, e:
         logger.exception(e)
         error = "failed"
         return self.render("infrastracture/add_service_data_traffic.html",
                            error=None, host_ip=host_ip)
Example #10
0
 def login(self, no_gui=False):
     try:
         self._login_by_cookie()
     except CookieLoginFailed:
         logger.info("Cookie login failed.")
         while True:
             if self._login_by_qrcode(no_gui):
                 if self._login_by_cookie():
                     break
             time.sleep(4)
     user_info = self.get_self_info()
     self.get_online_friends_list()
     self.get_group_list_with_group_id()
     self.get_group_list_with_group_code()
     try:
         self.username = user_info['nick']
         logger.info(
             "User information got: user name is [%s]" % self.username
         )
     except KeyError:
         logger.exception(
             "User info access failed, check your login and response:\n%s"
             % user_info
         )
         exit(1)
     logger.info("RUNTIMELOG QQ:{0} login successfully, Username:{1}".format(self.account, self.username))
Example #11
0
    def get_historical_data(self, market, period, unit):
        """
        Queries the historical data in the form of a list

        :param market: String literal for the market (ex: BTC-LTC)
        :type market: str
        :param period: Number of periods to query
        :type period: int
        :param unit: Ticker interval (one of: 'oneMin', 'fiveMin', 'thirtyMin', 'hour', 'week', 'day', and 'month')
        :type unit: str

        :return: List adapted from Bittrex JSON response
        :rtype : list
        """
        request_url = "https://bittrex.com/Api/v2.0/pub/market/GetTicks?marketName={}&tickInterval={}".format(market,
                                                                                                              unit)

        try:
            historical_data = requests.get(request_url,
                                           headers={"apisign": hmac.new(self.api_secret.encode(), request_url.encode(),
                                                                        hashlib.sha512).hexdigest()}
                                           ).json()
            return historical_data["result"][-period:]
        except (json.decoder.JSONDecodeError, TypeError) as exception:
            logger.exception(exception)
            return []
Example #12
0
    def uin_to_account(self, tuin):
        """
        将uin转换成用户QQ号
        :param tuin:
        :return:str 用户QQ号
        """
        uin_str = str(tuin)
        try:
            logger.info("RUNTIMELOG Requesting the account by uin:    " + str(tuin))
            info = json.loads(
                self.client.get(
                    'http://s.web2.qq.com/api/get_friend_uin2?tuin={0}&type=1&vfwebqq={1}&t={2}'.format(
                        uin_str,
                        self.vfwebqq,
                        self.client.get_timestamp()
                    ),
                    self.smart_qq_refer
                )
            )
            logger.debug("RESPONSE uin_to_account html:    " + str(info))
            if info['retcode'] != 0:
                raise TypeError('uin_to_account retcode error')
            info = info['result']['account']
            return info

        except Exception:
            logger.exception("RUNTIMELOG uin_to_account fail")
            return None
Example #13
0
 def run(self):
     while True:
         try:
             self.job()
         except Exception as e:
             logger.exception('Job failed: %s, %s' % (type(e), str(e)))
             pass
         time.sleep(self.period)
Example #14
0
def run_GUI():
    check_system()
    try:
        from GUI import TickeysApp
        TickeysApp().run()
    except Exception, e:
        logger.exception(e)
        logger.error("Run GUI Fail, reason:")
        os._exit(0)
Example #15
0
def connect_to_xenserver():
    for host in settings.XEN:
        with Timeout(1.0):
            try:
                proxy = xmlrpclib.ServerProxy("http://" + host[0])
                result = proxy.session.login_with_password(host[1], host[2])
                session_id = result['Value']
                global_xenserver_conn[host[0]] = session_id
            except Exception, e:
                logger.exception(e)
Example #16
0
def excel_write(filepath, worksheet, row, column, value):
    try:
        logger.info("excel path: %s, write location(%s, %s) and value is: %s",
                    filepath, row, column, value)
        xfile = openpyxl.load_workbook(filepath)
        sheet = xfile.get_sheet_by_name(worksheet)
        sheet[row + column] = value
        xfile.save(filepath)
    except Exception as ex:
        logger.exception(ex)
Example #17
0
def show_notify(notify_content=""):
    try:
        notify2.init('Tickeys')
        title = 'Tickeys'
        icon_file_path = os.getcwd() + '/tickeys.png'
        notify = notify2.Notification(title, notify_content, icon_file_path)
        notify.show()
    except Exception, e:
        logger.exception(e)
        logger.error("show notify fail")
Example #18
0
 def GET_LogonHtml(self):
     auth = tweepy.OAuthHandler(os.environ["consumer_key"], os.environ["consumer_secret"], callback="http://localhost:8888/callback.html", secure=True)
     try:
         auth_url = auth.get_authorization_url()
         session = self.server.getSession(self)
         session['request_token'] = {"key": auth.request_token.key, "secret": auth.request_token.secret}
         self.__sendRedirect(auth_url)
     except TweepError as e:
         logger.exception(e)
         self.__sendRedirect("/")
Example #19
0
    def __getTimelineFeatures(self, timeline):
        logger.info(u"Get timeline features")
        tweets = []
        self.__changePhase(PHASE["GET_TIMELINE_URLS"])
        for t in timeline:
            try:
                tweet = TweetText(t, self.__urlBuilder, self.__userBuilder)
            except:
                logger.exception(u"Error: \"" + unicode(t) + u"\"")
                raise ValueError(t)
            logger.debug(u"Tweet:" + unicode(tweet))
            tweets.append(tweet)

        urls = []
        ti = 0
        for tweet in tweets:
            for url in tweet.urls():
                self.__breakIfStopped()
                self.__urlResolver.addUrlToQueue(url)
                urls.append(url)
            logger.info(u"Tweet:" + unicode(tweet))
            ti += 1
            self.__proc = 100 * float(ti) / float(len(tweets))

        #Kategorie
        self.__changePhase(PHASE["GET_TIMELINE_FEATURES"])
        url2labels = {}
        ui = 0
        for url in urls:
            self.__breakIfStopped()
            if not url.isError():
                logger.debug(u"Classify " + unicode(url.getUrl()))
                url2labels[url.getExpandedUrl()] = self._classifier().classify(url.getText())
            ui += 1
            self.__proc = 100 * float(ui) / float(len(urls))

        labelsFreq = FreqDist()
        for labels in url2labels.values():
            for label in labels:
                labelsFreq.inc(label)
        self.__catFreq = labelsFreq.items()
        logger.info(u"Categories: "  + unicode(labelsFreq.items()))
        labelsFreqValues = [(item[0], item[1]) for item in labelsFreq.items() if item[0] not in ['short', 'medium', 'long']]
        #normalizacja
        labelsFreqValues = {label: float(freq) / float(max([f for l,f in labelsFreqValues])) for label, freq in labelsFreqValues}
        logger.info(u"Category factors: "  + unicode(labelsFreqValues))

        #Języki
        langFreq = FreqDist()
        for u in urls:
            langFreq.inc(u.lang())
        self.__langFreq = langFreq.items()
        logger.info(u"Languages: " + unicode(langFreq.items()))

        return labelsFreqValues
Example #20
0
 def connect(self):
     '''
     返回mongodb的连接对象
     '''
     try:
         self.connection = pymongo.MongoClient(host=self.mongo_uri,
                                             max_pool_size=self.conn_pool_size,
                                             connectTimeoutMS=self.conn_timeout)
     except Exception, e:
         logger.exception(e)
         raise
Example #21
0
 def update(self, collection, condition, contents):
     '''
     更新记录
     condition为条件,是一个字典
     contents为需要更新的内容
     '''
     if not condition or not isinstance(condition, dict):
         error = RuntimeError("Need *condition* parameter")
         logger.exception(error)
         raise error
     self.get_collection(collection).update(condition, {"$set": contents})
        def __init__(self, cassandra_cluster, user,
                     endpoint, method, form_variables, get_variables,
                     init_log):
            self.cassandra_cluster = cassandra_cluster
            self.user = user

            # https://datastax-oss.atlassian.net/browse/PYTHON-212
            self.today = datetime.datetime.combine(datetime.date.today(),
                                                   datetime.datetime.min.time())
            self.request_timeuuid = generate_timeuuid()

            self.form_variables = _sanatize(form_variables)
            self.get_variables = _sanatize(get_variables)

            if not init_log:
                return

            # store first record of endpoint access
            try:
                self.cassandra_cluster.session.execute(
                    self.cassandra_cluster.insert_access_statement.bind((
                        self.today,
                        self.request_timeuuid,
                        self.request_timeuuid,
                        self.user,
                        'init',
                        endpoint,
                        method,
                        self.form_variables,
                        self.get_variables,
                        None
                    ))
                )
                self.cassandra_cluster.session.execute(
                    self.cassandra_cluster.insert_user_access_statement.bind((
                        self.user,
                        self.request_timeuuid,
                        self.request_timeuuid,
                        'init',
                        endpoint,
                        method,
                        self.form_variables,
                        self.get_variables,
                        None
                    ))
                )
                self.cassandra_cluster.session.execute(
                    self.cassandra_cluster.insert_last_seen_statement.bind((
                        self.user,
                        datetime.datetime.now()
                    ))
                )
            except:
                logger.exception('Database inaccessible!')
Example #23
0
def connect_to_xenserver():
    for host in XEN:
        if host[0] not in global_xenserver_conn:
            try:
                transport = TimeoutTransport()
                session = XenAPI.Session("http://" + host[0], transport)
                session.login_with_password(host[1], host[2])
                global_xenserver_conn[host[0]] = session
                logger.warn("Connect to XenServer: {0} are success(with timeout).".format(host[0]))
            except Exception, e:
                logger.exception(e)
Example #24
0
 def read_head(self):
     # 管道中发送的数据,使用简单的协议封装
     self.pack_size = binproto.get_pack_size()
     try:
         self.head = os.read(self.fd, self.pack_size)
     except Exception as e:
         if e[0] in (errno.EWOULDBLOCK, errno.EAGAIN):
             self.ioloop.add_handler(self.fd, self.data_processor, self.ioloop.READ)
         else:
             logger.exception(e)
     else:
         self.process_head()
Example #25
0
 def initialise(self):
     """
     Fetch the initial coin pairs to track and to print the header line
     """
     try:
         if len(self.Database.app_data["coinPairs"]) < 1:
             self.Database.store_coin_pairs(self.get_markets("BTC"))
         self.Messenger.print_header(len(self.Database.app_data["coinPairs"]))
     except ConnectionError as exception:
         self.Messenger.print_exception_error("connection")
         logger.exception(exception)
         exit()
Example #26
0
 def GET_CallbackHtml(self):
     try:
         callback = self.scm + "://" + self.netloc + "callback.html"
         auth = tweepy.OAuthHandler(os.environ["consumer_key"], os.environ["consumer_secret"], callback=callback, secure=True)
         session = self.server.getSession(self)
         rt = session['request_token']
         auth.set_request_token(rt["key"], rt["secret"])
         verifier = self.params["oauth_verifier"][0]
         session["token"] = auth.get_access_token(verifier)
         self.__loadUser()
     except:
         logger.exception("Error")
     self.__sendRedirect("/")
Example #27
0
 def runPart(self):
     userFeatures = self.__userMgr.doJob(self.__token, self.__userId, self, self.__screenName)
     sessionKey = "features-" + userFeatures.screenName()
     self.__session[sessionKey] = userFeatures
     logger.info("Store user features in session[\"" + sessionKey +"\"")
     try:
         userFeatures.doJob()
     except NothingToDo as e:
         raise e
     except BaseException as e:
         logger.exception(u"UserFeatures.doJob error for " + unicode(self.__userId) + u" " + unicode(self.__screenName))
         raise e
     raise NothingToDo()
Example #28
0
 def process_head(self):
     if len(self.head) == self.pack_size:
         try:
             self.source, self.obj_id, self.body_length = binproto.unpack(self.head)
         except Exception as e:
             logger.exception(e)
         else:
             logger.info("Got head: (%s %s %s)" % (self.source, self.obj_id, self.body_length))
             self.read_body()
     else:
         logger.error("Error occurred while read head, close the pipe.")
         self.ioloop.remove_handler(self.fd)
         self.close_fd()
Example #29
0
 def decrypt(self):
     if encrypted:
         cipher = AES.new(getpass.getpass("Input decryption password (string will not show)"))
         try:
             self.api_key = ast.literal_eval(self.api_key) if type(self.api_key) == str else self.api_key
             self.api_secret = ast.literal_eval(self.api_secret) if type(self.api_secret) == str else self.api_secret
         except Exception:
             logger.exception(Exception)
             pass
         self.api_key = cipher.decrypt(self.api_key).decode()
         self.api_secret = cipher.decrypt(self.api_secret).decode()
     else:
         raise ImportError("`pycrypto` module has to be installed")
Example #30
0
 def __fixRepo(self):
     for filename, url in self.__filenameToUrl.iteritems():
         try:
             txtfullpath = os.path.join(self.__dir, filename)
             htmlfullpath = txtfullpath[:-4] + ".html"
             if not os.path.exists(htmlfullpath):
                 logger.info("Download: " + url + " to " + htmlfullpath)
                 content, url = self.__download(url)
                 f = open(htmlfullpath, "w")
                 f.write(content.data())
                 f.close()
         except:
             logger.exception("Error")
Example #31
0
    def get_articles(self):
        # get date first
        dateUrl = 'http://www.mingpaocanada.com/TOR/'
        theDate = datetime.datetime.today().strftime('%Y%m%d')
        try:
            doc = html.document_fromstring(read_http_page(dateUrl))
            for aLink in doc.get_element_by_id('mp-menu').xpath(
                    '//div/ul/li/a'):
                if aLink.text_content() == u'明報首頁':
                    href = aLink.attrib['href']
                    match = re.match('htm\/News\/([0-9]{8})\/main_r\.htm',
                                     href)
                    if match and match.lastindex == 1:
                        theDate = match.group(1)
                    else:
                        logger.info('no date found. using system date: ' +
                                    theDate)
        except Exception as e:
            logger.exception('Problem getting date: ' + str(e))
            logger.exception(
                traceback.format_exception(etype=type(e),
                                           value=e,
                                           tb=e.__traceback__))

        resultList = []
        sections = [
            ('要聞', 'http://www.mingpaocanada.com/TOR/htm/News/' + theDate +
             '/TAindex_r.htm'),
            ('加國新聞', 'http://www.mingpaocanada.com/TOR/htm/News/' + theDate +
             '/TDindex_r.htm'),
            ('中國', 'http://www.mingpaocanada.com/TOR/htm/News/' + theDate +
             '/TCAindex_r.htm'),
            ('國際', 'http://www.mingpaocanada.com/TOR/htm/News/' + theDate +
             '/TTAindex_r.htm'),
            ('港聞', 'http://www.mingpaocanada.com/TOR/htm/News/' + theDate +
             '/HK-GAindex_r.htm'),
            ('經濟', 'http://www.mingpaocanada.com/TOR/htm/News/' + theDate +
             '/THindex_r.htm'),
            ('體育', 'http://www.mingpaocanada.com/TOR/htm/News/' + theDate +
             '/TSindex_r.htm'),
            ('影視', 'http://www.mingpaocanada.com/TOR/htm/News/' + theDate +
             '/HK-MAindex_r.htm'),
            ('副刊', 'http://www.mingpaocanada.com/TOR/htm/News/' + theDate +
             '/WWindex_r.htm'),
        ]

        baseUrl = 'http://www.mingpaocanada.com/TOR/htm/News/' + theDate + '/'
        try:
            for (title, url) in sections:
                # for each section, insert a title...
                resultList.append(self.create_section(title))
                # ... then parse the page and extract article links
                doc = html.document_fromstring(
                    read_http_page(url).decode('big5', errors='ignore'))
                for topic in doc.xpath(
                        '//h4[contains(@class, "listing-link")]/a'):
                    if topic.text and topic.get('href'):
                        resultList.append(
                            self.create_article(topic.text.strip(),
                                                baseUrl + topic.get('href')))

        except Exception as e:
            logger.exception('Problem processing url: ' + str(e))
            logger.exception(
                traceback.format_exception(etype=type(e),
                                           value=e,
                                           tb=e.__traceback__))

        return resultList
Example #32
0
    def update_offerings(self, slug, justwatch_id):
        if slug not in self.films:
            logger.warning('Could not update "%s", not in watchlist' % (slug))
            return None

        # Get offerings
        logger.info('Getting offerings for "%s" using JustWatch id=%s' %
                    (slug, justwatch_id))

        try:
            providers = {
                p['id']: p['clear_name']
                for p in self.justwatch.get_providers()
            }
            justwatch = self.justwatch.get_title(title_id=justwatch_id)
            print dumps(justwatch, indent=4)
            offers = justwatch.get('offers', [])
            justwatch_id = justwatch['id']
            justwatch_url = justwatch.get('full_paths',
                                          {}).get('MOVIE_DETAIL_OVERVIEW')
        except:
            logger.exception(
                'No offerings found for "%s" using JustWatch id=%s' %
                (slug, justwatch_id))
            return {}

        # if not offers:
        #     logger.error('No offerings found for "%s" using JustWatch id=%s' % (slug, justwatch_id))
        #     return {}

        # Parse JustWatch data
        try:
            # Offerings
            offerings = {}

            for offer in offers:
                if offer.get('provider_id') not in offerings:
                    offerings[offer.get('provider_id')] = {
                        'name': providers.get(offer.get('provider_id')),
                        'offers': [],
                        'offer_types': [],
                    }

                offerings[offer.get('provider_id')]['offers'].append({
                    'date_created':
                    offer.get('date_created'),
                    'monetization_type':
                    offer.get('monetization_type'),
                    'presentation_type':
                    offer.get('presentation_type'),
                    # 'provider_id': offer.get('provider_id'),
                    'urls':
                    offer.get('urls', {}),
                    'price':
                    offer.get('retail_price'),
                    'currency':
                    offer.get('currency'),
                })
                if offer.get('monetization_type') not in offerings[offer.get(
                        'provider_id')]['offer_types']:
                    offerings[offer.get('provider_id')]['offer_types'].append(
                        offer.get('monetization_type'))

            # Scoring
            tomato_id = None
            scoring = {}
            average_score = None
            scores = []

            for score in justwatch.get('scoring', []):
                if ':id' not in score['provider_type']:
                    key = score['provider_type'].replace(':', '_')
                    scoring[key] = score['value']

                    if key == 'imdb_score':
                        scores.append(float(score['value']))
                    if key == 'tmdb_score':
                        scores.append(float(score['value']))
                    if key == 'tomato_score':
                        scores.append((float(score['value']) / 10))
                    if key == 'metacritic_score':
                        scores.append((float(score['value']) / 10))

                if score['provider_type'] == 'tomato:id':
                    tomato_id = score['value']

            # Calculate average
            if len(scores) > 0:
                average_score = (float(sum(scores)) / len(scores))
                average_score = round(average_score, 2)

        except:
            logger.exception('Could not parse metadata for %s' % (slug))
            return {}

        # Update film
        logger.info('Updating offerings for "%s"' % (slug))

        self.films[slug]['ids']['justwatch'] = justwatch_id
        self.films[slug]['ids']['tomato'] = tomato_id
        self.films[slug]['offerings'] = offerings
        self.films[slug]['offerings_updated'] = time()
        self.films[slug]['offerings_updated_str'] = datetime.now().strftime(
            '%Y-%m-%d')
        self.films[slug]['justwatch_url'] = justwatch_url
        self.films[slug]['scoring'] = scoring
        self.films[slug]['scoring']['average'] = average_score
        self.save()

        return offerings
def ship():
    """Returns a 'status' dict containing relevant game status information (state, fuel, ...)"""
    latest_log = get_latest_log(PATH_LOG_FILES)
    ship_status = {
        'time': (datetime.now() -
                 datetime.fromtimestamp(getmtime(latest_log))).seconds,
        'status': None,
        'type': None,
        'location': None,
        'star_class': None,
        'target': None,
        'fuel_capacity': None,
        'fuel_level': None,
        'fuel_percent': None,
        'is_scooping': False,
        'sys_fully_scanned': False
    }
    # Read log line by line and parse data
    with open(latest_log, encoding="utf-8") as f:
        for line in f:
            log = loads(line)

            # parse data
            try:
                # parse ship status
                log_event = log['event']

                if log_event == 'StartJump':
                    ship_status['status'] = str('starting_' +
                                                log['JumpType']).lower()
                    ship_status['sys_fully_scanned'] = False
                    if 'StarClass' in log:
                        ship_status['star_class'] = log['StarClass']

                elif log_event == 'SupercruiseEntry' or log_event == 'FSDJump':
                    ship_status['status'] = 'in_supercruise'

                elif log_event == 'SupercruiseExit' or log_event == 'DockingCancelled' or (
                        log_event == 'Music' and ship_status['status']
                        == 'in_undocking') or (log_event == 'Location'
                                               and log['Docked'] is False):
                    ship_status['status'] = 'in_space'

                elif log_event == 'Undocked':
                    ship_status['status'] = 'in_space'

                elif log_event == 'DockingRequested':
                    ship_status['status'] = 'starting_docking'

                elif log_event == "Music" and log[
                        'MusicTrack'] == "DockingComputer":
                    if ship_status['status'] == 'starting_undocking':
                        ship_status['status'] = 'in_undocking'
                    elif ship_status['status'] == 'starting_docking':
                        ship_status['status'] = 'in_docking'

                elif log_event == 'Docked':
                    ship_status['status'] = 'in_station'

                elif log_event == 'FSSAllBodiesFound':
                    ship_status['sys_fully_scanned'] = True

                # parse ship type
                if log_event == 'LoadGame' or log_event == 'Loadout':
                    ship_status['type'] = log['Ship']

                # parse fuel
                if 'FuelLevel' in log and ship_status['type'] != 'TestBuggy':
                    ship_status['fuel_level'] = log['FuelLevel']
                if 'FuelCapacity' in log and ship_status['type'] != 'TestBuggy':
                    if type(log['FuelCapacity']) == float:
                        ship_status['fuel_capacity'] = log['FuelCapacity']
                    else:
                        ship_status['fuel_capacity'] = log['FuelCapacity'][
                            'Main']

                if log_event == 'FuelScoop' and 'Total' in log:
                    ship_status['fuel_level'] = log['Total']
                if ship_status['fuel_level'] and ship_status['fuel_capacity']:
                    ship_status['fuel_percent'] = round(
                        (ship_status['fuel_level'] /
                         ship_status['fuel_capacity']) * 100)
                else:
                    ship_status['fuel_percent'] = 10

                # parse scoop
                if log_event == 'FuelScoop' and ship_status[
                        'time'] < 10 and ship_status['fuel_percent'] < 100:
                    ship_status['is_scooping'] = True
                else:
                    ship_status['is_scooping'] = False

                # parse location
                if (log_event == 'Location'
                        or log_event == 'FSDJump') and 'StarSystem' in log:
                    ship_status['location'] = log['StarSystem']

                # parse target
                if log_event == 'FSDTarget':
                    if log['Name'] == ship_status['location']:
                        ship_status['target'] = None
                    else:
                        ship_status['target'] = log['Name']
                elif log_event == 'FSDJump':
                    if ship_status['location'] == ship_status['target']:
                        ship_status['target'] = None

            # exceptions
            except Exception as trace:
                logger.exception("Exception occurred: {}".format(trace))
    #     logger.debug('ship='+str(ship))
    return ship_status
Example #34
0
 def handle_exception(error):
     stream_logger.exception(error)
     if not app.debug:
         logger.exception(error)
     response = {'message': error.message, 'success': False}
     return response, error.status
Example #35
0
    def __init__(self, input_file, heiplas_samples):
        counter = 0
        samples = []

        #load HeiPLAS ds
        adj_noun_to_attr = {}
        attr_set = set()
        for sample in heiplas_samples:
            adj_noun_to_attr[(sample.adj, sample.noun)] = sample.attr
            attr_set.add(sample.attr)

        #load adj_noun_count samples
        with open(input_file) as f:
            for row in f:
                try:
                    counter += 1
                    if counter % PRINT_EVERY == 0:
                        logger.info(
                            "processing {} row for AdjNounAttrDs".format(
                                counter))
                        if DEBUG_MODE:
                            logger.info("debug mode - stop loading data")
                            break
                    row_data = row.rstrip('\n\r').split("\t")
                    sample = Sample(row_data)
                    #if the ample exist in HeiPLAS, so it has attribute - update the attribute
                    if adj_noun_to_attr.has_key(
                        (sample.adj.word, sample.noun.word)):
                        logger.debug("updating attribute for ({},{})".format(
                            sample.adj.word, sample.noun.word))
                        sample.update_attr(
                            adj_noun_to_attr[(sample.adj.word,
                                              sample.noun.word)])
                        del adj_noun_to_attr[(sample.adj.word,
                                              sample.noun.word)]
                    samples.append(sample)
                except:
                    logger.exception(
                        "exception while processing row:[{}]".format(row))

        logger.info("Number of samples: [{}]".format(len(samples)))
        logger.info("Going to add {} samples from Heiplas".format(
            len(adj_noun_to_attr)))

        #iterate over remainig items in heiplas dict to add them to the dataset
        for (adj, noun), attribute in adj_noun_to_attr.iteritems():
            sample = Sample((adj, noun, 1), attr=attribute)
            samples.append(sample)
            # logger.debug("adding heiplas sample: {} {} {}".format(adj,noun,attribute.upper()))

        if SAMPLES_ABOVE_THRESHOLD_FLAG:
            logger.info("filtering out samples with count < [{}]".format(
                SAMPLE_COUNT_THRESHOLD))
            self.__samples = set([
                sample for sample in samples
                if sample.count >= SAMPLE_COUNT_THRESHOLD
            ])
        else:
            self.__samples = set(samples)
        logger.info("Number of unique samples : [{}]".format(
            len(self.__samples)))

        self.__samples = list(self.__samples)

        #updating samples with attribute label to the maximum sampling weight
        max_weight = max([samp.weight for samp in self.__samples])
        for samp in self.__samples:
            if samp.attr.word != UNKNOWN_WORD:
                samp.weight = max_weight
Example #36
0
def user_register():
    """
    Register a user
    ---
    post:
        summary: User registration endpoint.
        description: Register a new user.
        parameters:
            -   in: formData
                name: email
                description: an email of the user
                required: true
                type: string
            -   in: formData
                name: name
                description: a name of the user
                required: true
                type: string
            -   in: formData
                name: surname
                description: a surname of the user
                required: true
                type: string
            -   in: formData
                name: password
                required: true
                description: a password of the user
                type: string
        responses:
            400:
                description: Parameters are not correct
            409:
                description: User with the email already exists
            200:
                description: User was logged registered
    """
    try:
        data = flask.request.json if flask.request.json else flask.request.form
        email: str = data.get(EMAIL_KEY)
        password: str = data.get(PASSWORD_KEY)
        name: str = data.get(NAME_KEY)
        surname: str = data.get(SURNAME_KEY)

        if not (email and password and name and surname):
            return make_response(
                jsonify({MESSAGE_KEY: 'Not enough data provided'}),
                HTTPStatus.BAD_REQUEST)

        existing_user = User.objects(email=email).first()
        existing_user = existing_user if existing_user else User.objects(
            email=email.lower()).first()
        email = email.lower()
        if existing_user:
            return make_response(jsonify({MESSAGE_KEY: 'User already exists'}),
                                 HTTPStatus.CONFLICT)

        user = User(email=email,
                    password=_hash_password(password),
                    name=name,
                    surname=surname)

        key = RSA.generate(RSA_MODULO, Random.new().read)
        private_key, public_key = key, key.publickey()
        private_key_h = private_key.export_key(format="PEM",
                                               pkcs=8,
                                               passphrase=password)

        user.public_key = base64.b64encode(
            public_key.export_key()).decode('utf-8')
        user.private_key_h = private_key_h.decode("utf-8")

        if not user:
            return make_response(
                jsonify({MESSAGE_KEY: 'Failed to create user'}),
                HTTPStatus.INTERNAL_SERVER_ERROR)

        user.save()
        return make_response(jsonify({MESSAGE_KEY: 'Success'}), HTTPStatus.OK)
    except Exception as e:
        logger.exception(f'Failed to register user. Error {e}')
        return make_response(jsonify({MESSAGE_KEY: 'Something bad happened'}),
                             HTTPStatus.INTERNAL_SERVER_ERROR)
Example #37
0
    def get_articles(self):
        resultList = []

        pages = 3
        sections = [
            ("新聞", "https://www.storm.mg/articles"),
            ("評論", "https://www.storm.mg/all-comment"),
            ("財經", "https://www.storm.mg/category/23083"),
            ("生活", "https://www.storm.mg/category/104"),
            ("人物", "https://www.storm.mg/category/171151"),
            ("華爾街日報", "https://www.storm.mg/category/173479"),
            ("新新聞", "https://www.storm.mg/category/87726"),
        ]

        try:
            for (title, url) in sections:
                resultList.append(self.create_section(title))
                for page in range(1, pages + 1):
                    # for each section, insert a title...
                    # ... then parse the page and extract article links
                    doc = html.document_fromstring(
                        read_http_page(url + "/" + str(page)))

                    # get the first featured article
                    topic = doc.xpath(
                        '//div[contains(@class, "category_top_card")]/div[contains(@class, "card_img_wrapper")]'
                    )
                    if topic:
                        title = topic[0].xpath(
                            'div[contains(@class, "card_inner_wrapper")]/a[contains(@class, "link_title")]'
                        )
                        intro = topic[0].xpath(
                            'div[contains(@class, "card_inner_wrapper")]/a[contains(@class, "card_substance")]'
                        )
                        title_text = title[0].xpath(
                            "h2/text()") if title else None
                        if title and title_text and title[0].get("href"):
                            resultList.append(
                                self.create_article(
                                    title_text[0].strip(),
                                    title[0].get("href"),
                                    intro[0].text.strip()
                                    if intro and intro[0].text else None,
                                ))

                    for topic in doc.xpath(
                            '//div[contains(@class, "category_cards_wrapper")]/div[contains(@class, "category_card")]'
                    ):
                        title = topic.xpath(
                            'div[contains(@class, "card_inner_wrapper")]/a[contains(@class, "link_title")]'
                        )
                        intro = topic.xpath(
                            'div[contains(@class, "card_inner_wrapper")]/a[contains(@class, "card_substance")]'
                        )
                        title_text = title[0].xpath(
                            "h3/text()") if title else None

                        if title and title_text and title[0].get("href"):
                            resultList.append(
                                self.create_article(
                                    title_text[0].strip(),
                                    title[0].get("href"),
                                    intro[0].text.strip()
                                    if intro and intro[0].text else None,
                                ))

        except Exception as e:
            logger.exception("Problem processing url: " + str(e))
            logger.exception(
                traceback.format_exception(etype=type(e),
                                           value=e,
                                           tb=e.__traceback__))

        return resultList
Example #38
0
    def get_articles(self):
        siteBaseUrl = "https://money.udn.com"
        baseUrl = siteBaseUrl + "/money/cate/"

        resultList = []
        sections = [
            ("要聞", baseUrl + "10846"),
            ("國際", baseUrl + "5588"),
            ("兩岸", baseUrl + "5589"),
            ("產業", baseUrl + "5591"),
            ("證券", baseUrl + "5590"),
            ("金融", baseUrl + "12017"),
            ("期貨", baseUrl + "11111"),
            ("理財", baseUrl + "5592"),
            ("房市", baseUrl + "5593"),
            ("專欄", baseUrl + "5595"),
            ("商情", baseUrl + "5597"),
        ]

        try:
            for (title, url) in sections:
                # for each section, insert a title...
                resultList.append(self.create_section(title))
                # ... then parse the page and extract article links
                doc = html.document_fromstring(read_http_page(url))
                for topic in doc.xpath(
                        '//section[contains(@class, "cate-main__section")]/div[contains(@class, "story-headline-wrapper")]'
                ):
                    # main stories first...
                    link = topic.xpath(
                        'div[contains(@class, "story__content")]/a')
                    title = topic.xpath(
                        'div[contains(@class, "story__content")]/a/h3')
                    intro = topic.xpath(
                        'div[contains(@class, "story__content")]/a/p')
                    title_text = title[0].text if title else None

                    if title and title_text and link:
                        resultList.append(
                            self.create_article(
                                title_text.strip(),
                                siteBaseUrl + link[0].get("href"),
                                intro[0].text.strip()
                                if intro and intro[0].text else None,
                            ))

                for topic in doc.xpath(
                        '//section[contains(@class, "cate-main__section")]/ul[contains(@class, "story-flex-bt-wrapper")]'
                ):
                    # ... then other stories
                    titles = topic.xpath(
                        'li[contains(@class, "story__item")]/a')
                    for title in titles:
                        title_text = title.text
                        if title_text:
                            resultList.append(
                                self.create_article(
                                    title_text.strip(),
                                    siteBaseUrl + title.get("href"),
                                    None,
                                ))

        except Exception as e:
            logger.exception("Problem processing url: " + str(e))
            logger.exception(
                traceback.format_exception(etype=type(e),
                                           value=e,
                                           tb=e.__traceback__))

        return resultList
Example #39
0
def transition_next_state(update, context):
    # get current and next states
    # current is next state of previous state
    prev_state = States.get_prev_state(update, context)  # type: State
    current_state = States.get_current_state(update, context)  # type: State

    # check if it is a question state
    if current_state is not None and current_state.state_type == 'question':
        try:
            # if yes, store answer
            current_state.run_save_param(update, context)
        except TryAgainError:
            # if answer is not correct
            current_state.run_error_state(update, context)
            set_pending_reply(update, context, True)
            return False

    def print_if_not_none(prefix, state):
        if state is not None:
            logger.info(f'{prefix} {state.state}')
        else:
            logger.info(f'{prefix} {None}')

    print_if_not_none('prev', prev_state)
    print_if_not_none('curr', current_state)

    try:
        # if state_type == 'action', get_next_state will perform_action and determine what is its next state
        # e.g. if_else will choose 0 or 1 depending on if else results
        next_state = current_state.get_next_state(update, context)
    except UnknownError as e:  # this is usually due to server error
        # if perform_action raise error, catch here
        logger.exception(e)
        if current_state.has_error_state(
        ):  # check if user has defined error state
            current_state.run_error_state(update, context)
        else:
            current_state.run_server_error_state(update,
                                                 context)  # send server error

        set_pending_reply(update, context, True)
        return False

    # if no more next state, indicating the flow has ended, return True
    if next_state is None:
        if prev_state is not None:
            logger.info(
                f'Ended curr state {current_state.state} from: {prev_state.state}'
            )
        else:
            logger.info(f'Ended curr state {current_state.state}')
        return True

    # run_state here is to send message (and "action" will have no effect)
    next_state.run_state(update, context)
    print_if_not_none('next', next_state)

    context.user_data.write('current_state', current_state.state)
    context.user_data.write('next_state', next_state.state)
    set_pending_reply(update, context, next_state.state_type == 'question')

    # if it is not a question state, then just go to next state
    if next_state.state_type in ['command', 'action', 'message']:
        return transition_next_state(update, context)

    return True
Example #40
0
# -*- coding:utf-8 -*-

import logging
import os

from logger import logger


def fun():
    pass


try:
    fun(1)
except:
    logger.exception('Error Message')

print(11)
Example #41
0
    def update_metadata(self, slug, tmdb_id):
        if slug not in self.films:
            logger.warning('Could not update "%s", not in watchlist' % (slug))
            return None

        # Get metadata
        logger.info('Getting metadata for "%s" using TMDb id=%s' %
                    (slug, tmdb_id))

        details = self.tmdb.details(tmdb_id)

        if not details or details.get('status_code'):
            raise Exception('No metadata found for %s' % (slug))

        # Parse TMDb details
        try:
            # Details
            year = details.get('release_date')
            year = int(year.split('-')[0]) if year else None
            credits = details.get('credits', {})
            crew = credits.get('crew', [])

            metadata = {
                'title':
                details.get('title'),
                'original_title':
                details.get('original_title'),
                'year':
                year,
                'overview':
                details.get('overview'),
                'genres': [g['name'] for g in details.get('genres', [])],
                'runtime':
                details.get('runtime'),
                'original_language':
                details.get('original_language'),
                'spoken_languages':
                [l['name'] for l in details.get('spoken_languages', [])],
                'directors':
                [p['name'] for p in crew if p['job'] == 'Director'],
                'writers': [p['name'] for p in crew if p['job'] == 'Writer'],
            }

            # Images
            if details.get('backdrop_path') and not path.isfile(
                    path.join(BACKDROPS_PATH, '%s.jpg' % (slug))):
                try:
                    backdrop_url = TMDB_BACKDROP_URL % (
                        details.get('backdrop_path'))

                    logger.info('Fetching backdrop for "%s", url=%s' %
                                (slug, backdrop_url))

                    r = get(backdrop_url, stream=True)
                    r.raise_for_status()

                    with open(path.join(BACKDROPS_PATH, '%s.jpg' % (slug)),
                              'wb') as f:
                        r.raw.decode_content = True
                        copyfileobj(r.raw, f)
                except:
                    logger.exception('Could not save backdrop image')
            else:
                logger.warning('No backdrop found for "%s"' % (slug))
        except:
            logger.exception('TMDb parse error')
            raise Exception('Could not parse metadata for %s' % (slug))

        # Update film
        logger.info('Updating metadata for "%s"' % (slug))

        self.films[slug]['ids']['tmdb'] = details.get('id')
        self.films[slug]['ids']['imdb'] = details.get('imdb_id')
        self.films[slug]['metadata'] = metadata
        self.save()

        return metadata
Example #42
0
def summarize_pom_file(full_path):
    root = None
    try:
        root = ElementTree.parse(full_path).getroot()
    except Exception as error:
        logger.exception("file :{}".format(full_path))

    if root == None:
        return
    tagStr = root.tag
    namespace = ''
    if '}' in tagStr:
        namespace = tagStr[0:tagStr.index('}') + 1]

    property_values = {}

    project_version = root.find(namespace + 'version')

    if project_version is not None:
        property_values['project.version'] = project_version.text
    else:
        project_version = root.find(namespace + 'parent/' + namespace +
                                    'version')
        if project_version is not None:
            property_values['project.version'] = project_version.text

    properties = root.find(namespace + 'properties')

    # if 'com.google.truth__truth/0.41' in full_path:
    #   import pdb; pdb.set_trace()

    if properties is not None:
        properties_children = properties.getchildren()
        for _property in properties:
            property_name = _property.tag.replace(namespace, '')
            property_value = _property.text
            property_values[property_name] = property_value

    dependencies = root.findall(namespace + 'dependencies/' + namespace +
                                'dependency')
    pom_data = {'libs': [], 'deps': []}
    if len(dependencies) > 0:
        groupElement = root.find(namespace + 'groupId')
        if groupElement == None:
            groupIdElement = root.find(namespace + 'parent/' + namespace +
                                       'groupId')
            if groupIdElement != None:
                group_id1 = groupIdElement.text
            else:
                #     # logger.error("group id 1 not found: {}".format(full_path))
                return
        else:
            group_id1 = groupElement.text

        versionElement = root.find(namespace + 'version')
        if versionElement == None:
            versionIdElement = root.find(namespace + 'parent/' + namespace +
                                         'version')
            if versionIdElement != None:
                version1 = versionIdElement.text
            else:
                # logger.error("version 1 not found: {}".format(full_path))
                return

        else:
            version1 = versionElement.text
        version1 = get_version_from_property(version1, property_values)
        if '${' in version1:
            # logger.error("can not extract version1 :{} in {}".format(version1,full_path))
            return
        artifactElement = root.find(namespace + 'artifactId')
        if artifactElement == None:
            artifactIdElement = root.find(namespace + 'parent/' + namespace +
                                          'artifactId')
            if artifactIdElement != None:
                artifact_id1 = artifactIdElement.text
            else:
                # logger.error("artifact id 1 id not found: {}".format(full_path))
                return
        else:
            artifact_id1 = artifactElement.text

        if '{' in group_id1 or '{' in artifact_id1:
            # logger.error(" group_id or artifact id contain $ {}".format(artifact_id1))
            return

        nameElement = root.find(namespace + 'name')
        lib_name1 = None

        if nameElement != None:
            lib_name1 = nameElement.text
        key = group_id1 + "__" + artifact_id1
        if key in pom_data['libs']:
            pom_data['libs'].append(key)
        else:
            pom_data['libs'] = [key]

        for dependency in dependencies:
            groupIdElement = dependency.find(namespace + 'groupId')
            if groupIdElement == None:
                # logger.error("Group id not found: {}".format(full_path))
                continue
            else:
                groupId2 = groupIdElement.text
                artifactIdELement = dependency.find(namespace + 'artifactId')
                if artifactIdELement != None:
                    artifactId2 = artifactIdELement.text
                else:
                    # logger.error("artifactId2 not found: {}".format(full_path))
                    continue

                versionElement = dependency.find(namespace + 'version')
                version2 = None
                if '{' in groupId2 or '{' in artifactId2:
                    # logger.error("Group id or artifact_id contain $ {}".format(artifactId2))
                    continue

                if versionElement != None:
                    version2 = dependency.find(namespace + 'version').text

                    key2 = groupId2 + "__" + artifactId2
                    key2 = groupId2 + "__" + artifactId2
                    if key2 in pom_data['libs']:
                        pom_data['libs'].append(key2)
                    else:
                        pom_data['libs'] = [key2]
                # else:
                # logger.error("version 2 not found: {}".format(full_path))
                # # scope = dependency.find(namespace +'scope').text
                if version2:
                    version2 = get_version_from_property(
                        version2, property_values)
                    if '${' in version2:
                        # logger.error("can not extract version2 :{} in {}".format(version2,full_path))
                        continue
                    lib1 = key + "__" + version1
                    lib2 = key2 + "__" + version2
                    dep_key = "{}>{}".format(lib1, lib2)
                    if dep_key in pom_data['deps']:
                        pom_data['deps'] = [dep_key]
                    else:
                        pom_data['deps'].append(dep_key)

    lib_summary_file = full_path.replace('.pom', '.json')
    with open(lib_summary_file, 'w') as json_summary:
        json.dump(pom_data, json_summary)
    logger.info("write dom summary to : {}".format(lib_summary_file))
Example #43
0
                res = create_order(bitflyer,
                                   side="sell",
                                   amount=trade_amount,
                                   price=ticker_ask)
                order_id = res["id"]

                sleep(1)

                # 未決済のポジションを全て取得
                open_orders = get_open_orders(bitflyer)

                # takeされなかったのでキャンセル
                if len(open_orders) == 1:
                    cancel_order(bitflyer, order_id)
                    logger.info("Can not sell order")

                # takeされた場合
                else:
                    update_status_close(session, trade_history, order_id)
                    order_flg = False
                    logger.info("Close position")

    # キャッチして例外をログに記録
    except Exception as e:
        logger.exception(e)

    ### プロセス終了のため、ファイルを削除
    os.remove("process.txt")
    logger.info("=== trade_batch finish ===")
Example #44
0
    def get_articles(self):
        resultList = []
        sections = [
            ('Vancouver',
             'http://www.theprovince.com/scripts/Sp6Query.aspx?catalog=VAPR&tags=category|news|subcategory|metro%20vancouver'
             ),
            ('Fraser Valley',
             'http://www.theprovince.com/scripts/Sp6Query.aspx?catalog=VAPR&tags=category|news|subcategory|fraser%20valley'
             ),
            ('B.C.',
             'http://www.theprovince.com/scripts/Sp6Query.aspx?catalog=VAPR&tags=category|news|subcategory|b.c.'
             ),
        ]
        relSections = [
            ('Canada', 'http://www.theprovince.com/7588609.atom'),
            ('World', 'http://www.theprovince.com/7589147.atom'),
        ]

        try:
            for (title, url) in sections:
                # for each section, insert a title...
                resultList.append(self.create_section(title))
                # ... then parse the page and extract article links
                doc = etree.fromstring(read_http_page(url))
                for entry in doc.xpath(
                        '//ns:entry[@Status="FREE"]',
                        namespaces={'ns': 'http://www.w3.org/2005/Atom'}):
                    title = entry.xpath(
                        'ns:title[@type="html"]',
                        namespaces={'ns':
                                    'http://www.w3.org/2005/Atom'})[0].text
                    link = 'http://www.theprovince.com' + entry.xpath(
                        'ns:link[@type="text/html"]',
                        namespaces={'ns': 'http://www.w3.org/2005/Atom'
                                    })[0].get('href')
                    abstract = entry.xpath('ns:link[@type="text/html"]',
                                           namespaces={
                                               'ns':
                                               'http://www.w3.org/2005/Atom'
                                           })[0].get('Abstract')
                    resultList.append(
                        self.create_article(title.strip(), link, abstract))

            for (title, url) in relSections:
                # for each section, insert a title...
                resultList.append(self.create_section(title))
                # ... then parse the page and extract article links
                doc = etree.fromstring(read_http_page(url))
                for entry in doc.xpath(
                        '//ns:entry[@Status="FREE"]',
                        namespaces={'ns': 'http://www.w3.org/2005/Atom'}):
                    title = entry.xpath(
                        'ns:title[@type="html"]',
                        namespaces={'ns':
                                    'http://www.w3.org/2005/Atom'})[0].text
                    link = 'http://www.theprovince.com' + entry.xpath(
                        'ns:link[@type="text/xml"]',
                        namespaces={'ns': 'http://www.w3.org/2005/Atom'
                                    })[0].get('href')
                    abstract = entry.xpath('ns:link[@type="text/xml"]',
                                           namespaces={
                                               'ns':
                                               'http://www.w3.org/2005/Atom'
                                           })[0].get('Abstract')
                    resultList.append(
                        self.create_article(title.strip(), link, abstract))

        except Exception as e:
            logger.exception('Problem processing url: ' + str(e))
            logger.exception(
                traceback.format_exception(etype=type(e),
                                           value=e,
                                           tb=e.__traceback__))

        return resultList
Example #45
0
    def get_articles(self):
        resultList = []
        sections = [
            ('要聞',
             'https://www.singtao.ca/category/52-%E5%8D%A1%E5%8A%A0%E5%88%A9%E8%A6%81%E8%81%9E/?variant=zh-hk'
             ),
            ('加國新聞',
             'https://www.singtao.ca/category/54-%E5%8D%A1%E5%8A%A0%E5%88%A9%E5%8A%A0%E5%9C%8B/?variant=zh-hk'
             ),
            ('省市',
             'https://www.singtao.ca/category/65-%E5%8D%A1%E5%8A%A0%E5%88%A9%E7%9C%81%E5%B8%82/?variant=zh-hk'
             ),
            ('港聞',
             'https://www.singtao.ca/category/57-%E5%8D%A1%E5%8A%A0%E5%88%A9%E6%B8%AF%E8%81%9E/?variant=zh-hk'
             ),
            ('國際',
             'https://www.singtao.ca/category/56-%E5%8D%A1%E5%8A%A0%E5%88%A9%E5%9C%8B%E9%9A%9B/?variant=zh-hk'
             ),
            ('中國',
             'https://www.singtao.ca/category/58-%E5%8D%A1%E5%8A%A0%E5%88%A9%E4%B8%AD%E5%9C%8B/?variant=zh-hk'
             ),
            ('台灣',
             'https://www.singtao.ca/category/59-%E5%8D%A1%E5%8A%A0%E5%88%A9%E5%8F%B0%E7%81%A3/?variant=zh-hk'
             ),
            ('財經',
             'https://www.singtao.ca/category/61-%E5%8D%A1%E5%8A%A0%E5%88%A9%E8%B2%A1%E7%B6%93/?variant=zh-hk'
             ),
            ('體育',
             'https://www.singtao.ca/category/60-%E5%8D%A1%E5%8A%A0%E5%88%A9%E9%AB%94%E8%82%B2/?variant=zh-hk'
             ),
            ('娛樂',
             'https://www.singtao.ca/category/62-%E5%8D%A1%E5%8A%A0%E5%88%A9%E5%A8%9B%E6%A8%82/?variant=zh-hk'
             ),
        ]

        try:
            for (title, url) in sections:
                # for each section, insert a title...
                resultList.append(self.create_section(title))
                # ... then parse the page and extract article links
                doc = html.document_fromstring(
                    read_http_page(url, {
                        'edition': 'calgary'
                    }).decode('utf-8'))

                # top story
                top_story_link = doc.xpath(
                    '(//div[@class="td-ss-main-content"])[1]/div[@class="cat-header-image"]/a'
                )
                top_story_text = doc.xpath(
                    '(//div[@class="td-ss-main-content"])[1]/div[@class="cat-header-image"]/a/div/h3'
                )
                if top_story_link and top_story_text:
                    resultList.append(
                        self.create_article(top_story_text[0].text.strip(),
                                            top_story_link[0].get('href')))

                for topic in doc.xpath(
                        '(//div[@class="td-ss-main-content"])[1]/div[contains(@class, "td-animation-stack")]/div[@class="item-details"]/h3/a'
                ):
                    if topic.text and topic.get('href'):
                        resultList.append(
                            self.create_article(topic.text.strip(),
                                                topic.get('href')))

        except Exception as e:
            logger.exception('Problem processing url: ' + str(e))
            logger.exception(
                traceback.format_exception(etype=type(e),
                                           value=e,
                                           tb=e.__traceback__))

        return resultList
Example #46
0
    MAX_PENDING_GUILDS = int(os.getenv('MAX_PENDING_GUILDS', 5))
    GAME_LINK = os.getenv("GAME_LINK", "")
    _TOKEN = os.getenv('DISCORD_TOKEN')
    TOKEN_SITE = os.getenv('TOKEN_SITE')
    SONG_PATH = os.getenv("SONG_PATH", "files/_songs/")
    VERBOSE = int(os.getenv("VERBOSE", 20)
                  or 20)  # 0: no message, 10: few messages, 20: verbose
    CLIENT_ID = int(os.getenv("CLIENT_ID", None))
    PASSWORD_BOT_INVITE = os.getenv("PASSWORD_BOT_INVITE", None)
    PASSWORD_REMOVE_BOT = os.getenv("PASSWORD_REMOVE_BOT", None)
    PASSWORD_KICK_BOT = os.getenv("PASSWORD_KICK_BOT", None)
    CARD_BASE_URL = os.getenv("CARD_BASE_URL", "")  # card urls for Dixit game
except (KeyError, ValueError) as err:
    logger.error(
        "Failed to load environment variables. Program will terminate.")
    logger.exception(err)
    exit(1)
except AssertionError as err:
    logger.exception(err)
    exit(1)
else:
    if MAX_GUILDS < 1:
        logger.error(
            f"Maximum number of guilds set ({MAX_GUILDS}) is invalid; it must be strictly positive. "
            f"Program will terminate.")
        exit(2)
    if MAX_GUILDS > 1:
        logger.error(f"More than one guild is not supported yet!")
        exit(3)
    logger.info(f"Environment variables:\nGAME_LANGUAGE: {GAME_LANGUAGE}"
                f"\nVERBOSE: {VERBOSE}\nDEBUG_MODE: {DEBUG_MODE}")
Example #47
0
def new_password():
    """
    Change user's password
    ---
    post:
        summary: Password changing endpoint.
        description: Change user's password.
        parameters:
            -   in: formData
                name: old_password
                description: old user's password
                required: true
                type: string
            -   in: formData
                name: new_password
                description: a new password
                required: true
                type: string
        responses:
            400:
                description: Parameters are not correct
            401:
                description: Wrong current password
            406:
                description: New password is the same as an old one
            200:
                description: The password has been changed
    """
    try:
        data = flask.request.json if flask.request.json else flask.request.form
        old_password: str = data.get(OLD_PASSWORD_KEY)
        new_password: str = data.get(NEW_PASSWORD_KEY)

        if not (old_password and new_password):
            return make_response(
                jsonify({MESSAGE_KEY: 'Not enough data provided'}),
                HTTPStatus.BAD_REQUEST)

        if not _check_password(old_password, current_user.password):
            return make_response(
                jsonify({MESSAGE_KEY: 'Wrong current password'}),
                HTTPStatus.UNAUTHORIZED)

        if old_password == new_password:
            return make_response(
                jsonify(
                    {MESSAGE_KEY: 'New password is the same as an old one'}),
                HTTPStatus.NOT_ACCEPTABLE)

        try:
            private_key = RSA.importKey(extern_key=current_user.private_key_h,
                                        passphrase=old_password)
        except Exception as e:
            logger.exception(f'Failed to change password. Error {e}')
            return make_response(
                jsonify({
                    MESSAGE_KEY:
                    'Provided password failed to decrypt user data'
                }), HTTPStatus.UNAUTHORIZED)

        private_key_h = private_key.export_key(format="PEM",
                                               pkcs=8,
                                               passphrase=new_password)
        current_user.private_key_h = private_key_h.decode("utf-8")
        current_user.password = str(_hash_password(new_password))[2:-1]
        print(current_user.password)
        current_user.save()

        response = make_response(
            jsonify({
                MESSAGE_KEY: 'Success!',
                PRIVATE_KEY_H: current_user.private_key_h
            }), HTTPStatus.OK)
        response.set_cookie(PRIVATE_KEY_H, current_user.private_key_h)
        return response
    except Exception as e:
        logger.exception(f'Failed to change password. Error {e}')
        return make_response(jsonify({MESSAGE_KEY: 'Something bad happened'}),
                             HTTPStatus.INTERNAL_SERVER_ERROR)
Example #48
0
def handler(method, host, path, headers, body, wfile, timeout=60):
    time_request = time.time()

    if "Connection" in headers and headers["Connection"] == "close":
        del headers["Connection"]

    errors = []
    while True:
        time_left = time_request + timeout - time.time()
        if time_left <= 0:
            return_fail_message(wfile)
            return "ok"

        try:
            response = direct_front.request(method,
                                            host,
                                            path,
                                            headers,
                                            body,
                                            timeout=time_left)
            if response:
                if response.status > 600:
                    logger.warn("direct %s %s % status:%d", method, host, path,
                                response.status)
                    continue
                elif response.status > 400:
                    server_type = response.headers.get('Server', "")

                    if "G" not in server_type and "g" not in server_type and server_type not in google_server_types:

                        logger.warn(
                            "IP:%s host:%s not support GAE, server type:%s status:%d",
                            response.ssl_sock.ip, host, server_type,
                            response.status)
                        direct_front.ip_manager.report_connect_fail(
                            response.ssl_sock.ip, force_remove=True)
                        response.worker.close()
                        continue
                break
        except OpenSSL.SSL.SysCallError as e:
            errors.append(e)
            logger.warn("direct_handler.handler err:%r %s/%s", e, host, path)
        except Exception as e:
            errors.append(e)
            logger.exception('direct_handler.handler %r %s %s , retry...', e,
                             host, path)

    response_headers = {}
    for key, value in response.headers.items():
        key = key.title()
        response_headers[key] = value

    response_headers["Persist"] = ""
    response_headers["Connection"] = "Persist"

    try:
        wfile.write("HTTP/1.1 %d %s\r\n" % (response.status, response.reason))
        for key, value in response_headers.items():
            send_header(wfile, key, value)
        wfile.write("\r\n")

        length = 0
        while True:
            data = response.task.read()
            data_len = len(data)
            length += data_len
            if 'Transfer-Encoding' in response_headers:
                if not data_len:
                    wfile.write('0\r\n\r\n')
                    break
                wfile.write('%x\r\n' % data_len)
                wfile.write(data)
                wfile.write('\r\n')
            else:
                if not data_len:
                    break
                wfile.write(data)

        logger.info("DIRECT t:%d s:%d %d %s %s",
                    (time.time() - time_request) * 1000, length,
                    response.status, host, path)
        if 'Content-Length' in response_headers or 'Transfer-Encoding' in response_headers:
            return "ok"
    except NetWorkIOError as e:
        logger.warn('DIRECT %s %s%s except:%r', method, host, path, e)
        if e.args[0] not in (errno.ECONNABORTED, errno.ETIMEDOUT, errno.EPIPE):
            raise
    except Exception as e:
        logger.exception("DIRECT %s %d %s%s, t:%d send to client except:%r",
                         method, response.status, host, path,
                         (time.time() - time_request) * 1000, e)
Example #49
0
def login():
    """
    Login a user
    ---
    get:
        summary: Login endpoint.
        description: Login a user with email.
        parameters:
            -   in: formData
                name: email
                description: an email of the user
                required: true
                type: string
            -   in: formData
                name: password
                required: true
                description: a password of the user
                type: string
        responses:
            400:
                description: Parameters are not correct
            404:
                description: User was not found
            401:
                description: Credentials provided are incorrect
            200:
                description: User was logged in
    """
    try:
        data = flask.request.json if flask.request.json else flask.request.form
        email: str = data.get(EMAIL_KEY)
        password: str = data.get(PASSWORD_KEY)

        if not (email and password):
            return make_response(
                jsonify({MESSAGE_KEY: 'Not enough data provided'}),
                HTTPStatus.BAD_REQUEST)

        existing_user = User.objects(email=email.lower()).first()
        existing_user = existing_user if existing_user else User.objects(
            email=email).first()
        if not existing_user:
            return make_response(jsonify({MESSAGE_KEY: 'User not found'}),
                                 HTTPStatus.NOT_FOUND)
        if _check_password(password, existing_user.password):
            login_user(existing_user)

            response = make_response(
                jsonify({
                    MESSAGE_KEY:
                    'Success!',
                    TOKEN_KEY:
                    encode_auth_token(str(existing_user.id)).decode(),
                    PUBLIC_KEY:
                    existing_user.public_key,
                    PRIVATE_KEY_H:
                    existing_user.private_key_h
                }), HTTPStatus.OK)
            response.set_cookie(PUBLIC_KEY, existing_user.public_key)
            response.set_cookie(PRIVATE_KEY_H, existing_user.private_key_h)
            return response
        return make_response(jsonify({MESSAGE_KEY: 'Failed to authenticate'}),
                             HTTPStatus.UNAUTHORIZED)
    except Exception as e:
        logger.exception(f'Failed to login user. Error {e}')
        return make_response(jsonify({MESSAGE_KEY: 'Something bad happened'}),
                             HTTPStatus.INTERNAL_SERVER_ERROR)
Example #50
0
def sync():
    logger.info('Syncing podcast episodes')

    job = get_current_job()

    results = {}
    podcasts = Podcast.select()  #.limit(3)

    for i, podcast in enumerate(podcasts, 1):
        logger.info('Parsing feed, %s' % (podcast.name))

        error = None
        synced = 0
        ignored = 0

        try:
            episodes = parse_feed(get(podcast.feed_url).text)
        except Exception as e:
            logger.exception('Could not parse feed %s' % (podcast.feed_url))
            error = str(e)
            episodes = []

        for episode in episodes:
            job.meta['progress'] = ((i / float(len(podcasts))) * 100)
            job.save_meta()

            try:
                # Episode entity
                e = Episode.create(podcast=podcast, **episode)
                synced += 1

                # Document index
                clean_pattern = compile(ur'\W+', UNICODE)
                title = sub(clean_pattern, ' ', e.title)
                description = sub(clean_pattern, ' ', e.description)

                EpisodeIndex.insert({
                    EpisodeIndex.rowid:
                    e.id,
                    EpisodeIndex.title:
                    title.lower(),
                    EpisodeIndex.description:
                    description.lower()
                }).execute()

            except IntegrityError:
                ignored += 1
            except:
                logger.exception('Could not add episode to database')

        logger.info('Done parsing %s, added %d episodes, ignored %d' %
                    (podcast.name, synced, ignored))

        results[podcast.id] = {
            'podcast_name': podcast.name,
            'error': error,
            'synced': synced,
            'ignored': ignored
        }

    return results
Example #51
0
            botmanager.execute(bot)
            # logger.debug(bot.__name__, 'MOVED', botmanager.execute(bot))
        schedule.run_pending()
        for _ in range(len(COMMAND_STACK)):
            item = COMMAND_STACK.pop(0)
            if item['cmd'] == 'RESET':
                hard = 'hard' in item['args']
                reset(hard=hard)
        time.sleep(1)


while True:
    while not server:
        try:
            server = qnet3.Server('0.0.0.0', 7777, Connector)
            break
        except OSError as e:
            logger.error(e)
            time.sleep(5)
    try:
        logger.debug('Server listening on port 7777.')
        main(server)
    except KeyboardInterrupt:
        server.shutdown()
        print("\nHave a good day!")
        exit()
    except:
        logger.exception(
            'The server encountered an internal error. Restarting')
        time.sleep(5)
Example #52
0
def crawler():
    # Establishing connection with mongodb
    client = pymongo.MongoClient(config["localhost"], config["port_num"])
    db = client[config['database_name']]
    col = db[config['collection_name']]

    # starting scraping
    if col.count_documents(
        {}) == 0:  # if collection is empty : scrape flinkhub.com
        links_list1 = []
        headers1 = {
            'User-Agent': config['user_agent'],
        }
        try:  # send request
            logger.debug("Making HTTP GET request: " + config['host_name'])
            r1 = requests.get(config['host_name'], headers=headers1)
            res1 = r1.text
            logger.debug("Got HTML source, content length = " + str(len(res1)))

        except:  # if cannot request url
            logger.exception("Failed to get HTML source from " +
                             config['host_name'])
            traceback.print_exc()
            return links_list1

        logger.debug("Extracting links from the HTML")
        soup1 = BeautifulSoup(
            res1, 'html.parser')  # converting request to soup object

        # saving html content to a .txt file
        try:
            file_name1 = ''.join(
                random.choices(string.ascii_uppercase + string.digits, k=16))
            file_name1 = file_name1 + '.txt'
            file_path1 = os.path.join(os.getcwd(), config["file_dir"],
                                      file_name1)
            text_file1 = open(file_path1, "w")
            n1 = text_file1.write(str(soup1))
            text_file1.close()
        except:
            logger.exception("Cannot write link in a file.")

        if 'Content-Length' in r1.headers:
            new_doc = {
                "link": config["host_name"],
                "source_link": None,
                "is_crawled": True,
                "last_crawl_date": datetime.datetime.utcnow(),
                "response_status": r1.status_code,
                "content_type": r1.headers['Content-Type'],
                "con_length": r1.headers['Content-Length'],
                "file_path": file_path1,
                "created_at": datetime.datetime.utcnow(),
            }
        else:
            new_doc = {
                "link": config["host_name"],
                "source_link": None,
                "is_crawled": True,
                "last_crawl_date": datetime.datetime.utcnow(),
                "response_status": r1.status_code,
                "content_type": r1.headers['Content-Type'],
                "con_length": len(r1.content),
                "file_path": file_path1,
                "created_at": datetime.datetime.utcnow(),
            }
        col.insert_one(new_doc)  # inserting original link to a document

        links1 = soup1.find_all("a")  # finding all a tags

        for link1 in links1:  # iterating over all the links
            temp1 = link1.get('href')  # getting url
            if temp1 not in links_list1:  # if link was not scraped in the same cycle
                links_list1.append(temp1)

                # checking validity of link
                temp_parse1 = urllib.parse.urlparse(temp1)
                netloc_bool1 = bool(temp_parse1.netloc)
                scheme_bool1 = bool(temp_parse1.scheme)
                if netloc_bool1:
                    if scheme_bool1:
                        # if link is valid and absolute url
                        actual_link1 = temp1
                        query1 = {"link": actual_link1}
                        myq1 = col.find(query1)
                        if myq1.count == 0:
                            temp_doc1 = {
                                "link": actual_link1,
                                "source_link": config['host_name'],
                                "is_crawled": False,
                                "last_crawl_date": None,
                                "response_status": None,
                                "content_type": None,
                                "con_length": None,
                                "file_path": None,
                                "created_at": datetime.datetime.utcnow()
                            }
                            col.insert_one(
                                temp_doc1)  # adding link to a document
                        else:
                            print(temp1 + " already exists in database."
                                  )  # if link already exists in database
                    else:
                        print(temp1 +
                              " link not valid")  # if link is not valid
                else:
                    # if link is a relative url
                    actual_link2 = urllib.parse.urljoin(
                        config['host_name'], temp1)
                    netloc_bool2 = bool(urllib.parse.urlparse(actual_link2))
                    scheme_bool2 = bool(urllib.parse.urlparse(actual_link2))
                    if netloc_bool2 and scheme_bool2:  # if relative url is valid
                        query2 = {"link": actual_link2}
                        if col.count_documents(
                                query2
                        ) == 0:  # if link doesn't exist in collection already
                            temp_doc1 = {
                                "link": actual_link2,
                                "source_link": config['host_name'],
                                "is_crawled": False,
                                "last_crawl_date": None,
                                "response_status": None,
                                "content_type": None,
                                "con_length": None,
                                "file_path": None,
                                "created_at": datetime.datetime.utcnow()
                            }
                            col.insert_one(
                                temp_doc1)  # inserting link to collection
                        else:
                            print(str(actual_link2) + " already exists."
                                  )  # if link already exists in collection
                    else:
                        print(actual_link2 +
                              " not valid")  # if link is not valid
            else:
                print(temp1 + " Link already scraped"
                      )  # if link is already scraped in the same cycle
        return links1

    else:  # if there exist some links in the collection already
        if col.count_documents({
                "is_crawled": False
        }) > 0:  # if there exist some documents which are not crawled

            # picking a random link from the collection to be scraped
            num1 = col.count_documents({"is_crawled": False})
            random1 = math.floor(random.random() * num1)
            cursor_doc = col.find({"is_crawled": False}).limit(1).skip(random1)
            for curs in cursor_doc:
                doc = curs
            links_list2 = []
            og_link = doc['link']
            headers2 = {
                'User-Agent': config['user_agent'],
            }
            try:  # requesting link
                logger.debug("Making HTTP GET request: " + og_link)
                r2 = requests.get(og_link, headers=headers2)
                res2 = r2.text
                logger.debug("Got HTML source, content length = " +
                             str(len(res2)))
            except:
                logger.exception("Failed to get HTML source from " + og_link)
                traceback.print_exc()
                return links_list2

            logger.debug("Extracting links from the HTML")
            soup2 = BeautifulSoup(
                res2, 'html.parser')  # converting request to a soup object
            # saving html content to a file
            try:
                file_name2 = ''.join(
                    random.choices(string.ascii_uppercase + string.digits,
                                   k=16))
                file_name2 = file_name2 + '.txt'
                file_path_2 = os.path.join(os.getcwd(), config['file_dir'],
                                           file_name2)
                text_file2 = open(file_path_2, "w")
                n2 = text_file2.write(str(soup2))
                text_file2.close()
            except:
                logger.exception("Cannot write link in a file.")

            if 'Content-Length' in r2.headers:
                updated_doc = {
                    "is_crawled": True,
                    "last_crawl_date": datetime.datetime.utcnow(),
                    "response_status": r2.status_code,
                    "content_type": r2.headers['Content-Type'],
                    "con_length": r2.headers['Content-Length'],
                    "file_path": file_path_2,
                }
            else:
                updated_doc = {
                    "is_crawled": True,
                    "last_crawl_date": datetime.datetime.utcnow(),
                    "response_status": r2.status_code,
                    "content_type": r2.headers['Content-Type'],
                    "con_length": len(r2.content),
                    "file_path": file_path_2,
                }

            col.update_one(
                doc,
                {"$set": updated_doc})  # updating link which was just scraped

            links2 = soup2.find_all("a")  # converting request to a soup object
            for link2 in links2:
                temp2 = link2.get('href')  # getting link from a tag
                if temp2 not in links_list2:  # itertaing through links
                    links_list2.append(temp2)

                    # checking validity of links
                    temp_parse3 = urllib.parse.urlparse(temp2)
                    netloc_bool3 = bool(temp_parse3.netloc)
                    scheme_bool3 = bool(temp_parse3.scheme)
                    if netloc_bool3:
                        if scheme_bool3:
                            # valid absolute link
                            actual_link3 = temp2
                            query3 = {"link": actual_link3}
                            if col.count_documents(query3) == 0:
                                temp_doc = {
                                    "link": actual_link3,
                                    "source_link": og_link,
                                    "is_crawled": False,
                                    "last_crawl_date": None,
                                    "response_status": None,
                                    "content_type": None,
                                    "con_length": None,
                                    "file_path": None,
                                    "created_at": datetime.datetime.utcnow()
                                }
                                col.insert_one(
                                    temp_doc)  # adding link to the collection
                            else:
                                print(temp2 + " already exists."
                                      )  # if link already exists in collection
                        else:  # if link is not valid
                            print(temp2 + " link not valid")
                    else:
                        # link is a relative link
                        actual_link4 = urllib.parse.urljoin(og_link, temp2)
                        netloc_bool4 = bool(
                            urllib.parse.urlparse(actual_link4))
                        scheme_bool4 = bool(
                            urllib.parse.urlparse(actual_link4))
                        if netloc_bool4 and scheme_bool4:
                            # valid relative link
                            query4 = {"link": actual_link4}
                            if col.count_documents(
                                    query4
                            ) == 0:  # checking for existence of link in collection
                                temp_doc = {
                                    "link": actual_link4,
                                    "source_link": og_link,
                                    "is_crawled": False,
                                    "last_crawl_date": None,
                                    "response_status": None,
                                    "content_type": None,
                                    "con_length": None,
                                    "file_path": None,
                                    "created_at": datetime.datetime.utcnow()
                                }
                                col.insert_one(
                                    temp_doc)  # adding link to the collection
                            else:
                                print(actual_link4 + " already exists."
                                      )  # link already exists in collection
                        else:
                            print(actual_link4 +
                                  " not valid")  # link is not valid
            return links2  # return list of links found

        else:  # if there are no links which are not crawled yet
            valid_docs = col.find({})
            # finding links which were not crawled in last 24 hours
            time_dif = datetime.timedelta(days=1)
            greater_than_24_docs = []
            for single_doc in valid_docs:
                if single_doc["last_crawl_date"] > time_dif:
                    greater_than_24_docs.append(single_doc)
            num2 = len(greater_than_24_docs)
            # picking a random link out of those links which were not crawled in last 24 hours
            random2 = random.randint(0, num2 - 1)
            doc = greater_than_24_docs[random2]
            links_list2 = []
            og_link = doc.link
            headers2 = {
                'User-Agent': config['user_agent'],
            }
            # making a https request
            try:
                logger.debug("Making HTTP GET request: " + og_link)
                r2 = requests.get(og_link, headers=headers2)
                res2 = r2.text
                logger.debug("Got HTML source, content length = " +
                             str(len(res2)))
            except:
                logger.exception("Failed to get HTML source from " + og_link)
                traceback.print_exc()
                return links_list2

            logger.debug("Extracting links from the HTML")
            soup2 = BeautifulSoup(
                res2, 'html.parser')  # turning request into soup object

            try:
                # writing html content to a txt file
                file_name2 = ''.join(
                    random.choices(string.ascii_uppercase + string.digits,
                                   k=16))
                file_name2 = file_name2 + '.txt'
                file_path2 = os.path.join(os.getcwd(), config['file_dir'],
                                          file_name2)
                text_file2 = open(file_path2, "w")
                n2 = text_file2.write(str(soup2))
                text_file2.close()
            except:
                logger.exception("Cannot write link in a file.")

            if 'Content-Length' in r2.headers:
                updated_doc = {
                    "is_crawled": True,
                    "last_crawl_date": datetime.date.today(),
                    "response_status": r2.status_code,
                    "content_type": r2.headers['Content-Type'],
                    "con_length": r2.headers['Content-Length'],
                    "file_path": file_path2,
                }

            else:

                updated_doc = {
                    "is_crawled": True,
                    "last_crawl_date": datetime.date.today(),
                    "response_status": r2.status_code,
                    "content_type": r2.headers['Content-Type'],
                    "con_length": len(r2.content),
                    "file_path": file_path2,
                }

            col.update_one(doc,
                           {"$set": updated_doc
                            })  # updating the recently crawled link document

            links2 = soup2.find_all("a")  # finding all anchor tags
            for link2 in links2:  # iterating through a tags
                temp2 = link2.get('href')  # geting the link from a tag
                if temp2 not in links_list2:  # if link wasn't found in this cycle
                    links_list2.append(temp2)
                    # checking for validity of link
                    temp_parse3 = urllib.parse.urlparse(temp2)
                    netloc_bool3 = bool(temp_parse3.netloc)
                    scheme_bool3 = bool(temp_parse3.scheme)
                    if netloc_bool3:
                        if scheme_bool3:
                            # link is absolute url and valid
                            actual_link3 = temp2
                            query3 = {"link": actual_link3}
                            if col.count_documents(query3) == 0:
                                temp_doc = {
                                    "link": actual_link3,
                                    "source_link": og_link,
                                    "is_crawled": False,
                                    "last_crawl_date": None,
                                    "response_status": None,
                                    "content_type": None,
                                    "con_length": None,
                                    "file_path": None,
                                    "created_at": datetime.datetime.utcnow()
                                }
                                col.insert_one(
                                    temp_doc)  # adding link to the collection
                            else:
                                print(
                                    temp2 + " already exists."
                                )  # if link already exists in the collection
                        else:
                            print(temp2 +
                                  " link not valid")  # link is not valid
                    else:
                        # link is a relative link
                        actual_link4 = urllib.parse.urljoin(og_link, temp2)
                        netloc_bool4 = bool(
                            urllib.parse.urlparse(actual_link4))
                        scheme_bool4 = bool(
                            urllib.parse.urlparse(actual_link4))
                        if netloc_bool4 and scheme_bool4:
                            # link is relative and valid
                            query4 = {"link": actual_link4}
                            if col.count_documents(query4) == 0:
                                temp_doc = {
                                    "link": actual_link4,
                                    "source_link": og_link,
                                    "is_crawled": False,
                                    "last_crawl_date": None,
                                    "response_status": None,
                                    "content_type": None,
                                    "con_length": None,
                                    "file_path": None,
                                    "created_at": datetime.datetime.utcnow()
                                }
                                col.insert_one(
                                    temp_doc
                                )  # adding link document to collection
                            else:
                                print(actual_link4 + " already exists."
                                      )  # link already exists in collection
                        else:
                            print(actual_link4 +
                                  " not valid")  # link is not valid
            return link2
Example #53
0
 def display(self, val, colon):
     try:
         self._display(val, colon)
     except Exception as e:
         logger.exception(e)
Example #54
0
    Messenger = Messenger(secrets, settings)
    Trader = Trader(secrets, settings)

    Trader.initialise()

    while True:
        try:
            Trader.analyse_pauses()
            Trader.analyse_buys()
            Trader.analyse_sells()
            time.sleep(10)

        except SSLError as exception:
            Messenger.print_exception_error("SSL")
            logger.exception(exception)
            time.sleep(10)
        except ConnectionError as exception:
            Messenger.print_exception_error("connection")
            logger.exception(exception)
            time.sleep(10)
        except json.decoder.JSONDecodeError as exception:
            Messenger.print_exception_error("JSONDecode")
            logger.exception(exception)
            time.sleep(10)
        except TypeError as exception:
            Messenger.print_exception_error("typeError")
            logger.exception(exception)
            time.sleep(10)
        except KeyError as exception:
            Messenger.print_exception_error("keyError", True)
Example #55
0
    get_user,
    iter_user_member_of,
    iter_group_member_of,
    iter_group_members,
)

# Read environment variables
domain_offset_file = environ.get('DOMAIN_OFFSET_FILE', None)

# Initialize domain offset map
domain_offset = {}
try:
    with open(domain_offset_file, 'r') as domain_offset_file_stream:
        domain_offset = safe_load(domain_offset_file_stream)
except Exception:
    logger.exception('Failed to read domain offset file')
defult_domain_offset = domain_offset.get('*', 0)


def add_domain_offset(domain_name, security_identifier):
    rid = int(security_identifier.split('-')[-1])
    return domain_offset.get(domain_name, defult_domain_offset) + rid


def cache_processed_member(func):
    id_cache = set()

    def wrapped(member):
        if member['id'] in id_cache:
            logger.info('Already processed {}, skip.'.format(
                member['displayName']))
Example #56
0
def get_top_movies():
    """
    This function scrapes top movies from IMDB top movies page
    :return: list of movie dicts
    """

    # This list will contain the final output i.e. list of movie dicts
    top_movies_list = []

    headers = {
        'User-Agent': config['user_agent'],
    }

    # Get the IMDB top movies page HTML
    try:
        logger.debug("Making HTTP GET request: " + config['top_movies_url'])
        r = requests.get(config['top_movies_url'], headers=headers)
        res = r.text
        logger.debug("Got HTML source, content length = " + str(len(res)))
    except:
        logger.exception("Failed to get HTML source from " +
                         config['top_movies_url'])
        traceback.print_exc()

        # Returns empty array as there was an error in getting HTML
        return top_movies_list

    logger.debug("Extracting top movies info from the HTML")

    # Use the HTML to create a soup object
    soup = BeautifulSoup(res, 'html.parser')

    # Extract the tbody with class='lister-list'
    tbody = soup.find('tbody', class_="lister-list")

    # Extract table rows
    trs = tbody.find_all('tr')

    # Loop over table rows to extract each movie info
    for row in trs:
        # This is the second column in the row
        td1 = row.find('td', class_="titleColumn")

        # The tag that contains the name of the movie
        a_tag = td1.find('a')

        # Extract movie name
        movie_name = a_tag.get_text()

        # Extract the IMDB link of this movie
        link = a_tag['href']

        # Extract the year
        year = td1.find('span', class_="secondaryInfo").get_text()

        # Year contains ( and ) symbols for ex. (2012)
        try:
            nums = re.findall(r'\d+', year)
            if len(nums) > 0:
                year = int(nums[0])
        except:
            # Ignore this error
            pass

        # Extract IMDB rating from the third column
        td2 = row.find('td', class_="imdbRating")
        rating = td2.find('strong').get_text()

        # Converting rating string into float
        try:
            rating = float(rating)
        except:
            # Rating is not a valid float number
            rating = 0.0

        # Put all the extract information into a dict
        movie = {
            'name': movie_name,
            'year': year,
            'rating': rating,
            'link': link,
        }

        # Append the dict to the list that will be returned
        top_movies_list.append(movie)

    # Once all table rows have been processed, return the list
    return top_movies_list
Example #57
0
def application(environ, start_response):
    try:
        # 创建fcs_status索引
        fcs_status.create_status_index()
        # 创建fcs_audit索引
        fcs_audit.create_audit_index()

        # fcs_status.id == fcs_audit.id进行关联查询
        record_id = worker.get_id()
        # 记录启动状态
        fcs_status.record_status(record_id)
        params = environ['QUERY_STRING']
        logger.debug("origin environ['QUERY_STRING']: %s", params)
        environ['QUERY_STRING'] = urllib.parse.unquote(params)
        logger.debug("unquote_params is %s", environ['QUERY_STRING'])
        global audit_records
        audit_records.append(
            fcs_audit.assemble_audit_record_with_index("铁笼启动", "参数: " + environ['QUERY_STRING'], record_id,
                                                       len(environ["QUERY_STRING"])))

        pl = Pool(10)  # 进程池中从无到有创建10个进程,以后一直是这10个进程在执行任务
        res_l = []

        if "CodeUri" in os.environ:
            codeUris = str(os.getenv('CodeUri')).split(',')
            if "ModelUri" in os.environ:
                codeUris.extend(str(os.getenv('ModelUri')).split(','))
            start_time = time.time()
            for code_url in codeUris:
                filename = code_url.split('=')[-1]
                logger.debug("开始下载%s", filename)
                try:

                    res = pl.apply_async(downloadFile,
                                         args=(code_url, filename, record_id))
                    data_size = res.get()[1]
                    audit_records.append(
                        fcs_audit.assemble_audit_record_with_index("加载模型", "下载模型和算法文件" + filename, record_id,
                                                                   int(data_size)))
                    res_l.append(res)

                except ConnectionRefusedError as e:
                    result = traceback.format_exc()
                    logger.exception("下载模型和算法文件异常")
                    except_str = str(e.__class__.__name__) + ": " + str(e)
                    if log_level == "DEBUG":
                        audit_records.append(
                            fcs_audit.assemble_audit_record_with_index("铁笼异常", "下载模型和算法文件异常: " + str(result), record_id,
                                                                       len(result)))
                    else:
                        audit_records.append(
                            fcs_audit.assemble_audit_record_with_index("铁笼异常", "下载模型和算法文件异常: " + except_str, record_id,
                                                                       len(except_str)))
                except Exception as e:
                    result = traceback.format_exc()
                    logger.exception("下载模型和算法文件异常")
                    except_str = str(e.__class__.__name__) + ": " + str(e)
                    if log_level == "DEBUG":
                        audit_records.append(
                            fcs_audit.assemble_audit_record_with_index("铁笼异常", "下载模型和算法文件异常: " + str(result), record_id,
                                                                       len(str(result))))
                    else:
                        audit_records.append(
                            fcs_audit.assemble_audit_record_with_index("铁笼异常", "下载模型和算法文件异常: " + except_str, record_id,
                                                                       len(except_str)))

        pl.close()
        pl.join()
        for res in res_l:
            logger.debug("res.get is %s", res.get())

        logger.debug('主线程运行时间: %s' % (time.time() - start_time))

        try:
            defaultHandler = "HandlerName"
            mod = __import__(defaultHandler)
            logger.debug("=======开始计算=======")
            result = mod.FunctionName(environ, start_response)
            if result:
                export_result = str(result[0], encoding="utf-8")
            audit_records.append(
                fcs_audit.assemble_audit_record_with_index("铁笼输出", str(export_result), record_id, len(str(result))))
        except Exception as e:
            trans_result = traceback.format_exc()
            logger.exception("计算异常")
            except_str = str(e.__class__.__name__) + ": " + str(e)
            if log_level == "DEBUG":
                audit_records.append(
                    fcs_audit.assemble_audit_record_with_index("铁笼输出", "计算异常: " + str(trans_result), record_id,
                                                               len(str(trans_result))))
            else:
                audit_records.append(
                    fcs_audit.assemble_audit_record_with_index("铁笼输出", "计算异常: " + except_str, record_id,
                                                               len(except_str)))
            responsebody = str(trans_result)
            start_response('200 OK', [('Content-Type', 'application/json')])
            return [bytes(responsebody, encoding="utf8")]
        finally:
            audit_records.append(fcs_audit.assemble_audit_record_with_index("铁笼销毁", "", record_id, 0))
            fcs_audit.bulk_record(audit_records)
            # 更新为销毁状态
            fcs_status.record_status(record_id)
        logger.debug("===audit_records is===%s", audit_records)
        return result
    except AttributeError as e:
        return logInfo(e, record_id, start_response)
    except ModuleNotFoundError as e:
        return logInfo(e, record_id, start_response)
    except ImportError as e:
        return logInfo(e, record_id, start_response)
    except NameError as e:
        return logInfo(e, record_id, start_response)
    except KeyError as e:
        return logInfo(e, record_id, start_response)
    except IndexError as e:
        return logInfo(e, record_id, start_response)
    except LookupError as e:
        return logInfo(e, record_id, start_response)
    except SyntaxError as e:
        return logInfo(e, record_id, start_response)
    except StopIteration as e:
        return logInfo(e, record_id, start_response)
    except FloatingPointError as e:
        return logInfo(e, record_id, start_response)
    except OverflowError as e:
        return logInfo(e, record_id, start_response)
    except EOFError as e:
        return logInfo(e, record_id, start_response)
    except EnvironmentError as e:
        return logInfo(e, record_id, start_response)
    except IOError as e:
        return logInfo(e, record_id, start_response)
    except BaseException as e:
        return logInfo(e, record_id, start_response)
def job():
    """
	Esegue la ricerca degli episodi mancanti, se li trova li scarica.
	"""
    logger.warning('\n' + txt.START_BLOCK_LOG.format(
        time=time.strftime('%d %b %Y %H:%M:%S')) + '\n')

    try:
        raw_series = sonarr.getMissingEpisodes()
        if len(raw_series) != 0:
            series = converting(raw_series)

            for anime in series:
                for season in anime["seasons"]:
                    logger.warning('\n' + txt.DIVIDER_LOG + '\n\n')
                    try:
                        logger.warning(
                            txt.ANIME_RESEARCH_LOG.format(
                                anime=anime["title"], season=season["num"]) +
                            '\n')

                        results = [aw.Anime(link=x) for x in season["links"]]

                        logger.info(
                            txt.EPISODE_RESEARCH_LOG.format(episode=", ".join(
                                [x["num"]
                                 for x in season["episodes"]])) + '\n')

                        episodi = fixEps([x.getEpisodes() for x in results])

                        for episode in season["episodes"]:
                            logger.info('\n' +
                                        txt.CHECK_EPISODE_AVAILABILITY_LOG.
                                        format(season=episode["season"],
                                               episode=episode["num"]) + '\n')
                            for ep in episodi:

                                # episodio disponibile
                                if (str(ep.number) == str(episode["num"])
                                        and not anime["absolute"]
                                    ) or (str(ep.number) == str(episode["abs"])
                                          and anime["absolute"]):
                                    logger.info(txt.EPISODE_AVAILABLE_LOG +
                                                '\n')
                                    logger.warning(
                                        txt.EPISODE_DOWNLOAD_LOG.format(
                                            season=episode["season"],
                                            episode=episode["num"]) + '\n')

                                    title = f'{anime["title"]} - S{episode["season"]}E{episode["num"]}'

                                    file = ep.download(title, DOWNLOAD_FOLDER,
                                                       downloadProgress)
                                    if file:
                                        logger.info(
                                            txt.DOWNLOAD_COMPLETED_LOG + '\n')

                                        if SETTINGS["MoveEp"]:
                                            logger.info(
                                                txt.EPISODE_SHIFT_LOG.format(
                                                    season=episode["season"],
                                                    episode=episode["num"],
                                                    folder=anime["path"]) +
                                                '\n')
                                            if movefile(
                                                    os.path.join(
                                                        DOWNLOAD_FOLDER, file),
                                                    anime["path"]):
                                                logger.info(
                                                    txt.EPISODE_SHIFT_DONE_LOG
                                                    + '\n')

                                            logger.info(
                                                txt.ANIME_REFRESH_LOG.format(
                                                    anime=anime["title"]) +
                                                '\n')
                                            sonarr.rescanSerie(anime["ID"])

                                            if SETTINGS["RenameEp"]:
                                                logger.info(
                                                    txt.EPISODE_RENAME_LOG +
                                                    '\n')
                                                for i in range(
                                                        5):  # Fa 5 tentativi
                                                    try:
                                                        time.sleep(1)
                                                        epFileId = sonarr.getEpisodeFileID(
                                                            episode["ID"])
                                                    except KeyError:
                                                        continue
                                                    else:
                                                        sonarr.renameEpisode(
                                                            anime["ID"],
                                                            epFileId)
                                                        logger.info(
                                                            txt.
                                                            EPISODE_RENAME_DONE_LOG
                                                            + '\n')
                                                        break
                                                else:
                                                    logger.warning(
                                                        txt.
                                                        EPISODE_RENAME_ERROR_LOG
                                                        + '\n')

                                            if None not in (CHAT_ID,
                                                            BOT_TOKEN):
                                                logger.info(
                                                    txt.
                                                    SEND_TELEGRAM_MESSAGE_LOG +
                                                    '\n')
                                                telegram.warning(
                                                    txt.TELEGRAM_MESSAGE.
                                                    format(
                                                        title=anime["title"],
                                                        season=episode[
                                                            "season"],
                                                        episode=episode["num"],
                                                        episodeTitle=episode[
                                                            "title"]))

                                    break
                            else:
                                logger.info(txt.EPISODE_UNAVAILABLE_LOG + '\n')

                    except requests.exceptions.RequestException as res_error:
                        logger.warning(
                            txt.CONNECTION_ERROR_LOG.format(
                                res_error=res_error) + '\n')
                    except aw.AnimeNotAvailable as info:
                        logger.warning(
                            txt.WARNING_STATE_LOG.format(warning=info) + '\n')
                    except aw.ServerNotSupported as warning:
                        logger.error(
                            txt.ERROR_STATE_LOG.format(error=warning) + '\n')
                    except aw.DeprecatedLibrary as dev:
                        logger.critical(
                            txt.CRITICAL_STATE_LOG.format(critical=dev) + '\n')
                    finally:
                        logger.warning('\n' + txt.DIVIDER_LOG + '\n\n')

        else:
            logger.info('\n' + txt.NO_EPISODES + '\n\n')

    except requests.exceptions.RequestException as res_error:
        logger.error(
            txt.CONNECTION_ERROR_LOG.format(res_error=res_error) + '\n')
    except Exception as error:
        logger.exception(
            txt.EXCEPTION_STATE_LOG.format(exception=error) + '\n')

    nextStart = time.strftime(
        "%d %b %Y %H:%M:%S",
        time.localtime(time.time() + SETTINGS["ScanDelay"] * 60))
    logger.warning('\n' + txt.END_BLOCK_LOG.format(time=nextStart) + '\n\n')