class UrlShortener:
    def __init__(self):
        self.redis = Redis()
        if not self.redis.does_key_exist('id'):
            self.redis.add_key_value('id', '1')
        config = configparser.ConfigParser()
        config.read('config.ini')
        self.short_url = config['config']['short_url']
        self.accepted_char = config['config']['accepted_char']

    def shorten_url(self, original_url):
        if (self.redis.does_key_exist(original_url)):
            url_id = int(self.redis.get_value_by_key(original_url))
            shorten_url = self.url_encoder(url_id)
        else:
            url_id = int(self.redis.get_value_by_key('id'))
            self.redis.add_key_value(original_url, url_id)
            shorten_url = self.url_encoder(url_id)
            url_id += 1
            self.redis.add_key_value('id', str(url_id))
        return self.short_url + shorten_url

    def url_encoder(self, id):
        characters = self.accepted_char
        base = len(characters)
        encoden_url = []
        while id > 0:
            val = id % base
            encoden_url.append(characters[val])
            id = (id // base)
        return "".join(encoden_url[::-1])
Exemple #2
0
    def client_core_get(self,
                        addr="localhost",
                        port=6379,
                        unix_socket_path="{DIR_BASE}/var/redis.sock",
                        die=True):
        """

        :param addr:
        :param port:
        :param unix_socket_path:
        :return:
        """
        try:
            import redis
        except ImportError:
            if die:
                raise
            return

        unix_socket_path = self._tools.text_replace(unix_socket_path)
        self.unix_socket_path = unix_socket_path
        # cl = Redis(unix_socket_path=unix_socket_path, db=0)
        cl = Redis(Tools=self._tools, host=addr, port=port, db=0)
        try:
            r = cl.ping()
        except Exception as e:
            if isinstance(e, redis.exceptions.ConnectionError):
                if not die:
                    return
            raise

        assert r
        return cl
 def __init__(self):
     self.redis = Redis()
     if not self.redis.does_key_exist('id'):
         self.redis.add_key_value('id', '1')
     config = configparser.ConfigParser()
     config.read('config.ini')
     self.short_url = config['config']['short_url']
     self.accepted_char = config['config']['accepted_char']
Exemple #4
0
 def __init__(self, year, gender, locale):
     self.R = Redis().Connection
     self.year = year
     self.gender = gender
     self.locale = locale
     self.json_obj = json.dumps({
         "locale": self.locale,
         "year": self.year,
         "gender": self.gender
     })
 def __init__(self):
     """
     初始化
     :return:
     """
     self.processNum = PRECESSNUM
     self.accountQueue = Queue()
     self.urlQueue = Queue()
     self.dataStorage = DataStorage()
     self.__dataStorage = self.dataStorage.dataStorage
     self.accountName = ACCOUNT.get('name')
     self.urlName = LINKEDINURL.get('name')
     self.usersName = LINKEDINUSERS.get('name')
     self.redis = Redis(self.__dataStorage)
Exemple #6
0
 def alive(self):
     Redis().get_or_set("STARTTIME:DEMO:" + str(self.id), datetime.now())
     self.R.setex(
         str(self.id),
         30,
         json.dumps({
             'id': self.id,
             'count': self.counter,
             'last_total': self.total,
             'last_update': str(datetime.now())
         }),
     )
Exemple #7
0
def data():
    r = Redis()
    results = {}
    queue = request.args.get('queue') or "DemoQueue"

    redis_bucket = ""
    if queue == "DemoQueue":
        redis_bucket = "DEMO"
    if queue == "BabyNamesPrecache":
        redis_bucket = "BABYNAMESCACHE"

    rmq = RMQNegotiator(message_queue=queue)
    results["requeued"] = len(r.Connection.smembers('REQUEUE:' + redis_bucket))
    results["running"] = len(r.Connection.smembers('RUNNING:' + redis_bucket))

    last15_q = "SELECT Count(id) FROM failed WHERE timestamp > '%s'" % str(
        datetime.datetime.now() - datetime.timedelta(minutes=15))
    results["last_15m_fails"] = DemoHeartbeatDaemon().runQuery(last15_q)[0][0]

    try:
        results["failed_count"] = int(
            r.Connection.get("FAILED:" + redis_bucket +
                             ":COUNT").decode('utf8'))
    except:
        results["failed_count"] = 0
    try:
        results["completed"] = int(
            r.Connection.get("COMPLETED:" + redis_bucket +
                             ":COUNT").decode('utf8'))
    except:
        results["completed"] = 0

    results["queue_count"] = int(rmq.queue_count())

    results["queues"] = rmq.list_queues()
    return jsonify(results)
Exemple #8
0
class DemoHeartbeatDaemon:
    R = None
    PubSub = None
    Messenger = None

    def __init__(self):
        self.R = Redis()
        self.Messenger = RMQNegotiator(message_queue="DemoQueue")

    def runQuery(self, query=tuple, connstring=CONFIG.DB):
        db = sqlite3.connect(connstring)
        cursor = db.cursor()
        cursor.execute(query)
        response = cursor.fetchall()
        db.commit()
        db.close()
        return response

    def log_failure(self, ident, starttime=None, endtime=None):
        sqlStr = """INSERT INTO failed (ident, timestamp) values (%d, '%s');""" % (
            int(ident), str(datetime.now()))
        self.runQuery(sqlStr)
        print(sqlStr)

    '''
    rabbitmq publish requeue tasks are created here
    '''

    def requeue_failed(self):
        failures = self.R.Connection.smembers(FAILED)
        for failure in failures:
            f = failure.decode('utf8')
            print(f)
            self.R.Connection.srem(FAILED, failure.decode('utf8'))
            self.R.Connection.sadd(REQUEUE, int(failure.decode('utf8')))
            self.log_failure(f)
            self.Messenger.publish_messages([{"ident": f}])

            if self.R.Connection.get("FAILED:DEMO:COUNT") is None:
                self.R.Connection.set("FAILED:DEMO:COUNT", 1)
            else:
                fc = int(
                    self.R.Connection.get("FAILED:DEMO:COUNT").decode('utf8'))
                self.R.Connection.set("FAILED:DEMO:COUNT", fc + 1)

    def hb_compare(self, last_updated):
        elapsed = datetime.now() - last_updated
        if elapsed > timedelta(seconds=30):
            return True
        else:
            return False

    def monitor(self):
        while True:
            running = self.R.Connection.smembers(RUNNING)
            print("Count of running tasks: " + str(len(running)))
            for task in running:
                if self.R.Connection.get(task) is None:
                    self.R.Connection.sadd(FAILED, task)
                    self.R.Connection.srem(RUNNING, task)

            self.requeue_failed()
            time.sleep(5)

    def rerun_failures(self):
        self.R.subscribe(REQUEUE)
class MultiProcessCrawler(object):
    """
    多线程调用爬虫
    """
    def __init__(self):
        """
        初始化
        :return:
        """
        self.processNum = PRECESSNUM
        self.accountQueue = Queue()
        self.urlQueue = Queue()
        self.dataStorage = DataStorage()
        self.__dataStorage = self.dataStorage.dataStorage
        self.accountName = ACCOUNT.get('name')
        self.urlName = LINKEDINURL.get('name')
        self.usersName = LINKEDINUSERS.get('name')
        self.redis = Redis(self.__dataStorage)

    def __getAccountMany(self):
        """
        从数据库获得账户名和密码(大量)
        :return:null
        """
        # lastUpdateTime = int(time.time() - 24*60*60)
        # result = self.__name_pwd_collection.find({'user_state':'good','last_used_time':{'$lt':last_update_time}},{'_id':0,'email':1,'ld_pwd':1,'day_num':1})

        flag = True
        accountResult = self.__dataStorage.get(self.accountName).find(
            {'userState': 'normal'}, {
                '_id': 0,
                'email': 1,
                'password': 1
            })
        if accountResult.count() == 0:
            flag = False
        for account in accountResult:
            self.accountQueue.put(account)
        return flag

    def __getAccountRandomOne(self):

        ################这里其实有一个问题,如果程序突然关闭(人为或者意外),会造成一些麻烦,这个账号一直是using,但是没有人使用。。#####
        account = self.__dataStorage.get(self.accountName).find(
            {
                'userState': 'normal'
            }, {
                '_id': 0,
                'email': 1,
                'password': 1
            }).sort('lastUsedTime', 1).limit(1)
        account = account[0]
        self.__dataStorage.get(self.accountName).update(
            {'email': account['email']},
            {'$set': {
                'lastUsedTime': int(time.time())
            }})

        # if account:
        #     ###标记正在使用#########
        #     self.__dataStorage.get(self.accountName).update({'email' : account['email']},{'$set' : {'usedState' : 'using'}})
        ###########如果account没有,则是None
        return account

    #######################直接从mongodb读取,不利于分布式############
    def __getUrl(self):
        """
        读取没有读取过的url
        :return:
        """
        for url in self.__dataStorage.get(self.urlName).find({
                'isView': False
        }, {
                '_id': 0,
                'person_website': 1,
                'readNum': 1
        }).limit(URLLIMIT):
            self.urlQueue.put(url)

    #######################从redis读取,用于分布式############
    def __redisGetUrl(self):
        """
        读取没有读取过的url
        :return:
        """
        return self.redis.urlSpop()

    ##############################################
    #
    #  认定一个账号可以无限制的采集用户信息
    #
    ################################################
    def RunCrawler(self):
        while True:
            ######################现在弃用#######################
            # try:
            #     if self.accountQueue.qsize()==0:
            #         if not self.__getAccountMany() :##########如果没有账号,等待3个小时,再次循环###############
            #             print "sleep"
            #             time.sleep(3 * 60 * 60) #休息3个小时
            #             continue
            # except:
            #     print "maybe mongodb or network have a problem"
            #     time.sleep(10 * 60)
            #########################################################
            pool = Pool(self.processNum)
            [
                pool.apply_async(self.__RunCrawler)
                for i in range(self.processNum)
            ]
            pool.close()
            pool.join()

            ################test########################
            # self.__RunCrawler()

    def __RunCrawler(self):
        while True:
            ################这个是一次获取很多个账号########################
            # self.__getAccountMany()
            # if self.accountQueue.qsize() == 0:
            #     break
            # account = self.accountQueue.get(timeout=3)
            ##########################################
            time.sleep(random.randint(1, 3))
            account = self.__getAccountRandomOne()
            if not account:
                print "no have account"
                time.sleep(10 * 60)
                continue

            try:
                crawler = LinkedinCrawler(self.__dataStorage)

                ###############登陆账号######################
                if not crawler.login(account['email'], account['password']):
                    crawler.closeWeb()
                    continue

                limitNum = 60
                while limitNum:
                    limitNum = limitNum - 1
                    ##############单进程#####################
                    # if self.urlQueue.qsize() < 3:  # 假定的需要爬取的用户(url)一定是足够的,没有写保护措施(即url没有的情况)
                    #     self.__getUrl()
                    # LinkedinUrl = self.urlQueue.get()
                    ############多进程############################
                    LinkedinUrl = self.__redisGetUrl()
                    ###########################################
                    _LinkedinUrl = LinkedinUrl['person_website']
                    readNum = LinkedinUrl['readNum']
                    ###############采集用户信息####################
                    if crawler.processPage(_LinkedinUrl):
                        readNum += 1
                        ###########这个网页读取次数加1####################
                        self.__dataStorage.get(self.urlName).update(
                            {'person_website': _LinkedinUrl},
                            {'$set': {
                                'readNum': readNum
                            }})
                        if readNum >= 2:
                            self.__dataStorage.get(self.urlName).update(
                                {'person_website': _LinkedinUrl},
                                {'$set': {
                                    'isView': True
                                }})
                crawler.logout()

            except Exception as e:
                print "renderERROR"
            finally:
                time.sleep(30)
                crawler.closeWeb()
Exemple #10
0
 def __init__(self, id=-1):
     self.id = id
     self.R = Redis().Connection
Exemple #11
0
class DemoQueueWorker:
    R = None
    total = 0
    counter = 0
    id = None

    def __init__(self, id=-1):
        self.id = id
        self.R = Redis().Connection

    def start(self):
        print('starting job id: ' + str(self.id))
        self.R.sadd(RUNNING, self.id)
        self.alive()
        return self.perform_task()

    def alive(self):
        Redis().get_or_set("STARTTIME:DEMO:" + str(self.id), datetime.now())
        self.R.setex(
            str(self.id),
            30,
            json.dumps({
                'id': self.id,
                'count': self.counter,
                'last_total': self.total,
                'last_update': str(datetime.now())
            }),
        )

    def perform_task(self):
        while self.total <= 21:
            if self.counter > 6:
                self.counter = 0
                print('Seven hits and still didn\'t bust for ID: ' +
                      str(self.id))
                time.sleep(60)
                return False
            add_me = randrange(1, 11)
            self.counter += 1
            time.sleep(randint(1, 10))
            self.total += add_me
            self.alive()

        self.cleanup()
        return True

    def cleanup(self):
        self.counter = 0
        """
        message = {'flash_color': 'green',
                   'base_color': 'teal',
                   'interval': 0.3,
                   'count': 1
                   }
        Redis().publish("BlinkBlock", message)
        """
        self.R.srem(RUNNING, self.id)
        self.R.sadd(COMPLETE, self.id)
        c = self.R.get("COMPLETED:DEMO:COUNT")
        if c is not None:
            self.R.set("COMPLETED:DEMO:COUNT", int(c) + 1)
        else:
            self.R.set("COMPLETED:DEMO:COUNT", 1)
        if self.id in self.R.smembers(REQUEUE):
            self.R.srem(REQUEUE, self.id)
            self.R.set("COMPLETED:DEMO:COUNT", int(c) + 1)
        self.R.set("ENDTIME:DEMO:" + str(self.id), datetime.now())
Exemple #12
0
 def __init__(self, bucket):
     self.R = Redis().Connection
     self.bucket = bucket
Exemple #13
0
def flushall():
    result = {"success": True}
    r = Redis().Connection.flushall()
    return jsonify(result)
    def main(self):
        # Connect to redis and get image to proceed
        postgres = PostgreSQL()
        agreement_id = postgres.select_agreement_id(self.image_key)
        product_title = postgres.select_product_title(str(agreement_id[0]))
        product_type_title = postgres.select_product_type_tytle(
            product_title[0])
        global match_count, shelf
        redis = Redis(self.image_key)
        redis.get_image()

        # TODO: change to image
        enter_image_path = "C:\\Users\\savch\\PycharmProjects\\template-matcher\\data\\image\\image.jpg"
        # enter_image_path = "C:\\Users\\savch\\PycharmProjects\\template-matcher\\data\\image\\0000.jpg"
        # template_image_folder = "C:\\Users\\savch\\PycharmProjects\\template-matcher\\data\\template\\alcohol"
        template_image_folder = "C:\\Users\\savch\\PycharmProjects\\template-matcher\\data\\template\\{}".format(
            product_type_title[0])
        print(template_image_folder)

        # template image
        templ = [
            os.path.join(template_image_folder, b)
            for b in os.listdir(template_image_folder)
            if os.path.isfile(os.path.join(template_image_folder, b))
        ]
        templ = [
            template for template in templ if template ==
            "C:\\Users\\savch\\PycharmProjects\\template-matcher\\data\\template\\{}\\{}.jpg"
            .format(product_type_title[0], product_title[0])
        ]
        # templ = [template for template in templ if
        #          template == "C:\\Users\\savch\\PycharmProjects\\template-matcher\\data\\template\\{}\\{}.jpg".format(
        #              "alcohol", "elab")]

        # shelf count
        # shelf_count = 2
        crop_image_list = self.crop_image(enter_image_path, self.shelf_count)

        res_list = []
        match_count = 0
        for t in templ:

            img_tpl = cv2.imread(t, cv2.IMREAD_GRAYSCALE)
            m_count = 0
            for img in crop_image_list:
                shelf = int(img[len(img) - 5:len(img) - 4])
                img_gray = cv2.imread(img, cv2.IMREAD_GRAYSCALE)
                find_temp_res = self.find_templ(img_gray, img_tpl)

                coord = find_temp_res[0]
                image_to_draw = find_temp_res[1]

                # Match count on the shelf
                tmp = 0
                for x in coord:
                    if x is not None:
                        tmp += len(x)
                print("tmp: ", tmp)
                m_count += tmp

                if image_to_draw is None:
                    img_res = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2BGR)
                else:
                    img_res = cv2.cvtColor(image_to_draw, cv2.COLOR_GRAY2BGR)
                img_res = self.draw_frames(img_res, coord)
                tn = os.path.splitext(os.path.basename(img))[0]
                if len(coord) != 0:
                    cv2.imwrite(
                        "C:\\Users\\savch\\PycharmProjects\\template-matcher\\data\\result\\res_{}.jpg"
                        .format(randint(0, 1000)), img_res)
                # for c in coord:
                #     print(c)

                # TODO: coord count
                match_count += m_count
                if match_count != 0:
                    res_list.append(("res_{}_{}.jpg".format(tn, match_count),
                                     shelf, match_count))
                # if match_count != 0:
                #     break
        print(match_count)

        estimate(self.image_key, match_count, shelf)
Exemple #15
0
class BabyNamesWorker:
    total = 0
    counter = 0

    def __init__(self, year, gender, locale):
        self.R = Redis().Connection
        self.year = year
        self.gender = gender
        self.locale = locale
        self.json_obj = json.dumps({
            "locale": self.locale,
            "year": self.year,
            "gender": self.gender
        })

    def start(self):
        print('starting job: ' + self.json_obj)
        self.R.sadd(RUNNING, self.json_obj)
        self.alive()
        return self.perform_task()

    def alive(self):
        self.R.setex(
            str(json.dumps({
                "locale": self.locale,
                "year": self.year,
                "gender": self.gender
            })),
            30,
            json.dumps(
                {
                    'count': self.counter,
                    'last_total': self.total,
                    'last_update': str(datetime.now())
                }
            ),
        )

    def perform_task(self):
        self.alive()
        babynames_task.prepopulate_cache(self.year, self.gender, self.locale)
        self.cleanup()
        return True

    def cleanup(self):
        self.counter = 0
        """message = {'flash_color': 'yellow',
                                           'base_color': 'purple',
                                           'interval': 0.05,
                                           'count': 1
                                           }
        Redis().publish("BlinkBlock", message)"""
        self.R.srem(RUNNING, self.json_obj)
        self.R.srem(REQUEUE, self.json_obj)
        self.R.sadd(COMPLETE, self.json_obj)
        c = self.R.get("COMPLETED:BABYNAMESCACHE:COUNT")
        if c is not None:
            self.R.set("COMPLETED:BABYNAMESCACHE:COUNT", int(c) + 1)
        else:
            self.R.set("COMPLETED:BABYNAMESCACHE:COUNT", 1)
        if self.json_obj in self.R.smembers(REQUEUE):
            # fc = int(self.R.Connection.get("failed_count").decode('utf8'))
            # self.R.Connection.set("failed_count", fc - 1)
            self.R.srem(REQUEUE, self.json_obj)
Exemple #16
0
 def __init__(self):
     self.R = Redis()
     self.Messenger = RMQNegotiator(message_queue="DemoQueue")
Exemple #17
0
import sqlite3
from Redis import Redis
import CONFIG
from datetime import datetime
import rmq_negotiator

cachemanager = Redis()
USE_SQLITE = True

if USE_SQLITE:
    DBPATH = CONFIG.DBPATH

else:
    import pymysql as MySQLdb

db_user = ''
db_pass = ''
db_server = ''
db_name = ''
db_conn = db_server, db_user, db_pass, db_name,


def runQuery(query=tuple, connstring=db_conn):
    if not USE_SQLITE:
        db = MySQLdb.connect(
            host=connstring[0],
            user=connstring[1],
            passwd=connstring[2],
            db=connstring[3])
    else:
        db = sqlite3.connect(DBPATH)
Exemple #18
0
class JobsRuntimeKPIRoller:
    rares = []
    long_running = []

    def __init__(self, bucket):
        self.R = Redis().Connection
        self.bucket = bucket

    def __run_secs(self, id):
        start = "STARTTIME:" + str(self.bucket) + ":" + str(id)
        end = "ENDTIME:" + str(self.bucket) + ":" + str(id)
        rs = self.R.get(start)
        re = self.R.get(end)
        if rs is not None and re is not None:
            rs = rs.decode('utf8')
            re = re.decode('utf8')
            self.timestamps[id] = {"startTime": rs, "endTime": re}
        s = parser.parse(rs)
        e = parser.parse(re)
        return (e - s).seconds

    def __find_outliers(self):
        for id in self.ids:
            time = self.id_time_dict[id]
            if time > (self.mean + (self.std * 2)):

                self.rares.append({
                    "id": id,
                    "seconds": time,
                    # "startTime": self.timestamps[id]["startTime"],
                    # "endTime": self.timestamps[id]["endTime"]
                })

    def export_runtime_kpi(self):
        self.rares = []
        self.timestamps = {}
        ids = []
        times = []

        for v in self.R.smembers("COMPLETE:" + self.bucket):
            times.append(self.__run_secs(v.decode('utf8')))
            ids.append(int(v.decode('utf8')))

        self.id_time_dict = dict(zip(ids, times))
        self.ids = ids
        self.times = times
        self.total = len(times)
        self.max_run = max(times)
        self.mean = mean(times)
        self.median = median(times)
        self.median_grouped = median_grouped(times)
        self.std = pstdev(times)
        self.totalSeconds = sum(times)
        self.__find_outliers()

    def print_results(self):
        print("Type: " + self.bucket)
        print('Max: ' + str(self.max_run))
        print('Total: ' + str(self.total))
        print('Mean: ' + str(self.mean))
        print('Median: ' + str(self.median))
        print('Median grouped: ' + str(self.median_grouped))
        print('Standard Deviation: ' + str(self.std))
        # print(self.id_time_dict)
        print('Rares: ' + str(len(self.rares)))
        print('Rare %: ' + str((len(self.rares) / self.total) * 100))

    def results(self):
        return {
            "total": self.total,
            "max": self.max_run,
            "mean": self.mean,
            "median": self.median,
            "media_grouped": self.median_grouped,
            "standardDeviation": self.std,
            "rareCount": len(self.rares),
            "rarePercent": (len(self.rares) / self.total) * 100,
            "rareTimeById": self.rares,
            "totalSeconds": self.totalSeconds
        }