Exemple #1
0
 def import_db(cls, db_config):
     db = DBPool(**db_config)
     with cls.rds_cli.pipeline(transaction=False) as pipe:
         with db.db_session() as session:
             session.execute(
                 "SELECT phone, CONCAT_WS('|', province, city, isp) `value` FROM `phone`"
             )
             while True:
                 ret = session.fetchone()
                 if ret:
                     pipe.hset("phone", ret["phone"], ret["value"])
                 else:
                     break
         pipe.execute()
Exemple #2
0
 def import_db(cls, db_config):
     db = DBPool(**db_config)
     with cls.rds_cli.pipeline(transaction=False) as pipe:
         with db.db_session() as session:
             session.execute(
                 "SELECT ip_start_num, CONCAT_WS('|',ip_start_num, country, province, city, district, isp) `key` "
                 "FROM ip ORDER BY ip_start_num")
             while True:
                 ret = session.fetchone()
                 if ret:
                     pipe.zadd("ip", {ret["key"]: ret["ip_start_num"]})
                 else:
                     break
         pipe.execute()
Exemple #3
0
class PhoneRegionMem(object):
    """search region from RAM, 450155 records will cost about 372 MB"""
    _instance = None
    db = DBPool(**DATABASE_CONFIG["public"])

    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
        return cls._instance

    def __init__(self):
        self.phone_regions = {}
        self._load_db()

    def _load_db(self):
        with self.db.db_session() as session:
            session.execute("SELECT phone, province, city, isp FROM `phone`")
            results = session.fetchall()
            for res in results:
                self.phone_regions.update({
                    res["phone"]: {
                        "province": res["province"],
                        "city": res["city"],
                        "isp": res["isp"]
                    }
                })

    def lookup(self, phone):
        if regex_phone.match(phone):
            return self.phone_regions.get(phone[-11:-4], None)
Exemple #4
0
class PhoneRegion(object):
    """search region from mysql"""
    _instance = None
    db = DBPool(**DATABASE_CONFIG["public"])

    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
        return cls._instance

    @classmethod
    def lookup(cls, phone):
        if regex_phone.match(phone):
            with cls.db.db_session() as session:
                session.execute(
                    "SELECT province, city, isp FROM phone WHERE phone=LEFT( %(phone)s, 7)",
                    {"phone": phone})
                return session.fetchone()
Exemple #5
0
class IPRegionMem(object):
    """search region from ram, 648831 records will cost about 458 MB"""
    _instance = None
    db = DBPool(**DATABASE_CONFIG["public"])

    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
        return cls._instance

    def __init__(self):
        self.ip_regions = {}
        self.max_idx = 0
        self._load_db()

    def _load_db(self):
        with self.db.db_session() as session:
            session.execute(
                "SELECT ip_start_num, country, province, city, district, isp FROM ip ORDER BY ip_start_num"
            )
            self.ip_regions = session.fetchall()
            self.max_idx = len(self.ip_regions) - 1

    @staticmethod
    def ip_to_long(ip):
        _ip = socket.inet_aton(ip)
        return struct.unpack("!L", _ip)[0]

    def lookup(self, ip):
        if regex_ipv4_address.match(ip):
            ip_num = self.ip_to_long(ip)
            left = 0
            right = self.max_idx
            while left <= right:
                mid = left + (right - left) // 2
                if self.ip_regions[mid]["ip_start_num"] == ip_num:
                    break
                elif self.ip_regions[mid]["ip_start_num"] < ip_num:
                    left = mid + 1
                elif self.ip_regions[mid]["ip_start_num"] > ip_num:
                    right = mid - 1
            r = self.ip_regions[min(left, right)].copy()
            r.pop("ip_start_num")
            return r
Exemple #6
0
class IPRegion(object):
    """search region from mysql"""
    _instance = None
    db = DBPool(**DATABASE_CONFIG["public"])

    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
        return cls._instance

    @classmethod
    def lookup(cls, ip):
        if regex_ipv4_address.match(ip):
            with cls.db.db_session() as session:
                session.execute(
                    "SELECT country, province, city, district, isp FROM ip "
                    "WHERE INET_ATON( %(ip)s ) BETWEEN ip_start_num AND ip_end_num LIMIT 1",
                    {"ip": ip})
                return session.fetchone()
Exemple #7
0
                  help="to window or not to window")
parser.add_option("-n",
                  "--max_items",
                  type=int,
                  dest="max_items",
                  default=constants.MAX_ITEMS,
                  help="max number of items to scrape")
(options, args) = parser.parse_args()

# start searching
start = time()
ti = start
search_term = options.search_term

# get items
db_pool = DBPool(db_constants.DBCONFIG, pool_size=20)
# db_pool = mysql.connector.pooling.MySQLConnectionPool(pool_name="conn_pool", pool_size=constants.MAX_THREADS, **db_constants.DBCONFIG)

with open(constants.LOGFILE % date.today().strftime("%m-%d-%y"),
          'a') as logfile:
    researcher = researcher.Researcher(constants.BASESEARCHURL,
                                       options.max_items, search_term, db_pool,
                                       logfile)
    queue = researcher.scrape_all_pages()
    queue.join()

tf = time()
print("Retrieval took {0:.2f}s".format(tf - ti))
ti = tf

# analyze and report on items