Beispiel #1
0
    def __init__(self):
        self.wget = wget()
        self.db = MysqlConn()
        # max_sql = 'select max(url_num) from pages'
        # max_num = self.db.selectone(max_sql)[0]

        # min_sql = 'select max(pagenum) from guess'
        # min_num = self.db.selectone(min_sql)[0]
        # if not min_num:
        # 	min_num = 30611
        # print max_num,min_num

        # for num in range(max_num,min_num,-1):
        # 	sql = "select count(*) from pages where url_num = %s" % (num)
        # 	count_tuple = self.db.selectone(sql)
        # 	if not count_tuple[0]:
        # 		guess_date_sql = "select url_date from pages where url_num < %s order by url_num desc limit 1" % (num)
        # 		guess_date = self.db.selectone(guess_date_sql)[0]

        # 		sql = "insert into guess (pagedate,pagenum) values (%s,%s)" % (guess_date,num)
        # 		#print sql
        # 		self.db.query(sql)

        for num100 in range(31541):
            sql = 'select pagedate,pagenum from guess where pagenum<3040909 order by id asc limit %s,100' % (
                100 * num100)
            pagenum_tuple = self.db.selectall(sql)
            for num in range(len(pagenum_tuple)):
                pagedate, pagenum = pagenum_tuple[num]
                self.url_page(pagedate, pagenum)
                print pagedate, pagenum
Beispiel #2
0
class gm_clients():
    def __init__(self):
        self.client = gearman.GearmanClient(['localhost:4730'])
        self.admin = gearman.GearmanAdminClient(['localhost:4730'])
        self.job_list = []
        self.db = MysqlConn()

    def submit_jobs(self, data_sting):
        job_request = self.client.submit_job("reverse",
                                             data_sting,
                                             background=True)  #,
        self.job_list.append(job_request)

    # def check_jobs_status(self,job):

    # 	#print self.admin.get_workers()
    # 	#print self.client.get_job_status(job)
    # 	if job.state == 'COMPLETE':
    # 		print "Job %s finished!  Result: %s - %s" % (job.job.unique, job.state, job.result)
    # 		self.job_list.remove(job)

    def control(self):
        for num100 in range(31541):
            sql = 'select pagedate,pagenum from guess where pagenum<2938810 order by id asc limit %s,100' % (
                100 * num100)
            pagenum_tuple = self.db.selectall(sql)
            for num in range(len(pagenum_tuple)):
                string = '-'.join(str(e) for e in pagenum_tuple[num])
                self.submit_jobs(string)
Beispiel #3
0
    def __init__(self, thread_url, title, forum_type, fid):
        '''
		parameter:thread_dict['url']
		parameter:thread_dict['title']
		'''
        self.db = MysqlConn()
        self.wget = wget()
        self.utils = utils('ads')
        url = thread_url
        title = title
        print url
        #title 为空;舍弃
        if not title:
            return None

        pattern = re.compile(
            r'https://t66y.com/htm_data/(\d+)/(\d+)/(\d+)\.html', re.DOTALL)
        match = pattern.match(url)
        if match:
            url_id, url_date, url_num = match.groups()
        else:
            return None
        #thread已经存在;则直接返回
        # if self.db.thread_exist(fid=fid,url=url):
        # 	return True

        #页面抓取失败,返回状态不是200
        try:
            content = self.wget.get_content(url)
        except Exception, e:
            print 'str(e):\t\t', str(e)
            return None
Beispiel #4
0
class Guess(object):
    def __init__(self):
        self.wget = wget()
        self.db = MysqlConn()

    def url_page(self, gearman_worker, gearman_job):
        data_list = gearman_job.data.split('-')
        pagedate = int(data_list[0])
        pagenum = int(data_list[1])
        print pagedate, pagenum
        forum_url = [
            'https://t66y.com/htm_data/2/', 'https://t66y.com/htm_data/4/',
            'https://t66y.com/htm_data/5/', 'https://t66y.com/htm_data/7/',
            'https://t66y.com/htm_data/8/', 'https://t66y.com/htm_data/15/',
            'https://t66y.com/htm_data/16/', 'https://t66y.com/htm_data/20/',
            'https://t66y.com/htm_data/21/', 'https://t66y.com/htm_data/22/',
            'https://t66y.com/htm_data/23/', 'https://t66y.com/htm_data/25/',
            'https://t66y.com/htm_data/26/', 'https://t66y.com/htm_data/27/',
            'https://t66y.com/htm_data/10/'
        ]

        for new_date in (pagedate, pagedate - 1, pagedate + 1):
            hit = False
            for url_num in range(len(forum_url)):
                url = forum_url[url_num] + str(new_date) + '/' + str(
                    pagenum) + ".html"
                #print url
                try:
                    if '404' != self.wget.get_content(url):
                        hit = True
                        #print "get true"
                        self.url_success(url)
                        break
                except:
                    continue
            if hit:
                break
        return 'True'

    def url_success(self, url):
        url_sql = "insert into url(url) value('%s')" % (url)
        self.db.query(url_sql)
Beispiel #5
0
class Publish(object):
	def __init__(self):
		self.domain = discuz_var['domain']
		self.db = MysqlConn()
		self.count_peruser = 3
		self.user_login =False

	def register(self):
		fields = discuz_var['fields']
		str_sql = "select username,password,email,id from user_dis where status = %s" % (0,)
		users_tuple = self.db.selectall(str_sql)
		for num in range(len(users_tuple)):
			handler = DiscuzAPI(self.domain)
			username,passwd,email,user_id = users_tuple[num]
			if handler.register(username,passwd,email,fields):
				str_sql = "update user_dis set status=1 where id =%s"
				self.db.query(str_sql,(user_id,))
				handler.logout()
			del(handler)
		return True

	def random_publis(self):
		str_sql = "select id,dis_fid,content_type from forum where status =%s" % (0,)
		forums_tuple = self.db.selectall(str_sql)
		for num in range(len(forums_tuple)):
			forum_id,dis_fid,text_type = forums_tuple[num]
			if text_type == 'img':
				self.__publis_imgs(forum_id,dis_fid)

	def __publis_imgs(self,forum_id,dis_fid):
		str_sql = "select id,title from thread where fid=%s and status=%s" % (forum_id,0)
		threads_tuple = self.db.selectall(str_sql)
		for num in range(len(threads_tuple)):
			tid,title = threads_tuple[num]
			title = title[:40]
			imgs_content = u''

			str_sql = "select url from attach where tid =%s" % (tid,)
			urls_tuple = self.db.selectall(str_sql)
			if not urls_tuple:
				continue
			for url_num in range(len(urls_tuple)):
				imgs_content += u"[img]" + urls_tuple[url_num][0] + u"[/img]\n"

			if self.user_login and not num%self.count_peruser:
				discuz.logout()
				del(discuz)
				self.user_login =False

			if not self.user_login:
				discuz = self.choose_user()
			discuz.publish(dis_fid,subject=title,msg=imgs_content)
			str_sql = 'update thread set status=%s where id = %s' % (1,tid)
			self.db.query(str_sql)



	def choose_user(self):
		str_sql = "select count(*) from user_dis where status=1"
		count = self.db.selectone(str_sql)[0]
		id_choose = randint(1,count)

		str_sql = "select username,password from user_dis where id=%s" % (id_choose,)
		user_tuple = self.db.selectone(str_sql)

		handler = DiscuzAPI(self.domain)
		handler.login(user_tuple[0],user_tuple[1])
		self.user_login =True
		return handler
			# if not urls_tuple:
			# 	return False



# users_tuple = db.selectall(str_sql)

# #handler = DiscuzAPI(domain)
# email = set()
# for num in range(len(users_tuple)):
# 	user_info = users_tuple[num]
# 	if user_info[2] in email:
# 		str_sql = "delete from user_dis where id = %s" % (user_info[3])
# 		db.query(str_sql)
# 	else:
# 		email.add(user_info[2])



# 	handler = DiscuzAPI(domain)
# 	user_info = users_tuple[num]
# 	print user_info[0]
# 	if not handler.login(user_info[0],user_info[1]):
# 	    false_num +=1
# 	    if false_num ==5:
# 	        break
# 	    str_sql = "update user_dis set status =0 where id = %s" % (user_info[3])
# 	    db.query(str_sql)
# 	handler.logout()
# 	del(handler)
Beispiel #6
0
	def __init__(self):
		self.domain = discuz_var['domain']
		self.db = MysqlConn()
		self.count_peruser = 3
		self.user_login =False
Beispiel #7
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-
from discuz.discuz import DiscuzAPI
from libs.mysqlconn import MysqlConn

db = MysqlConn()

handler = DiscuzAPI('http://umt365.com')
handler.login('admin', '123Grey')

sql = 'select title,content from pages where fid=15 order by id asc'
results = db.selectall(sql)
for num in range(len(results)):
    subject, msg = results[num]
    handler.publish(42, subject, msg)

sql = 'select title,content from pages where fid=4 order by id asc'
results = db.selectall(sql)
for num in range(len(results)):
    subject, msg = results[num]
    handler.publish(43, subject, msg)

sql = 'select title,content from pages where fid=5 order by id asc'
results = db.selectall(sql)
for num in range(len(results)):
    subject, msg = results[num]
    handler.publish(44, subject, msg)

sql = 'select title,content from pages where fid=25 order by id asc'
results = db.selectall(sql)
for num in range(len(results)):
Beispiel #8
0
 def __init__(self):
     self.wget = wget()
     self.db = MysqlConn()
Beispiel #9
0
class Guess(object):
    def __init__(self):
        self.wget = wget()
        self.db = MysqlConn()
        # max_sql = 'select max(url_num) from pages'
        # max_num = self.db.selectone(max_sql)[0]

        # min_sql = 'select max(pagenum) from guess'
        # min_num = self.db.selectone(min_sql)[0]
        # if not min_num:
        # 	min_num = 30611
        # print max_num,min_num

        # for num in range(max_num,min_num,-1):
        # 	sql = "select count(*) from pages where url_num = %s" % (num)
        # 	count_tuple = self.db.selectone(sql)
        # 	if not count_tuple[0]:
        # 		guess_date_sql = "select url_date from pages where url_num < %s order by url_num desc limit 1" % (num)
        # 		guess_date = self.db.selectone(guess_date_sql)[0]

        # 		sql = "insert into guess (pagedate,pagenum) values (%s,%s)" % (guess_date,num)
        # 		#print sql
        # 		self.db.query(sql)

        for num100 in range(31541):
            sql = 'select pagedate,pagenum from guess where pagenum<3040909 order by id asc limit %s,100' % (
                100 * num100)
            pagenum_tuple = self.db.selectall(sql)
            for num in range(len(pagenum_tuple)):
                pagedate, pagenum = pagenum_tuple[num]
                self.url_page(pagedate, pagenum)
                print pagedate, pagenum

    def url_page(self, pagedate, pagenum):
        forum_url = [
            'https://t66y.com/htm_data/2/', 'https://t66y.com/htm_data/4/',
            'https://t66y.com/htm_data/5/', 'https://t66y.com/htm_data/7/',
            'https://t66y.com/htm_data/8/', 'https://t66y.com/htm_data/15/',
            'https://t66y.com/htm_data/16/', 'https://t66y.com/htm_data/20/',
            'https://t66y.com/htm_data/21/', 'https://t66y.com/htm_data/22/',
            'https://t66y.com/htm_data/23/', 'https://t66y.com/htm_data/25/',
            'https://t66y.com/htm_data/26/', 'https://t66y.com/htm_data/27/',
            'https://t66y.com/htm_data/10/'
        ]

        for new_date in (pagedate, pagedate - 1, pagedate + 1):
            hit = False
            for url_num in range(len(forum_url)):
                url = forum_url[url_num] + str(new_date) + '/' + str(
                    pagenum) + ".html"
                try:
                    if '404' != self.wget.get_content(url):
                        hit = True
                        self.url_success(url)
                        break
                except:
                    continue
            if hit:
                break

    def url_success(self, url):
        url_sql = "insert into url(url) value('%s')" % (url)
        self.db.query(url_sql)
Beispiel #10
0
 def __init__(self):
     self.client = gearman.GearmanClient(['localhost:4730'])
     self.admin = gearman.GearmanAdminClient(['localhost:4730'])
     self.job_list = []
     self.db = MysqlConn()
Beispiel #11
0
 def __init__(self):
     self.conn = MysqlConn()
Beispiel #12
0
class MySql(object):
    def __init__(self):
        self.conn = MysqlConn()

    def forum(self, platform):
        '''返回频道列表'''
        str_sql = "select id,url,type,content_type from forum where platform='%s' and status =0 " % (
            platform)
        result = self.conn.selectall(str_sql)
        forum_list = []
        for num in range(len(result)):
            forum_dict = {}
            forum_single = result[num]
            #print forum_single
            forum_dict['id'] = forum_single[0]
            if forum_single[2]:
                forum_dict['forum_url'] = forum_single[1] + forum_single[2]
            else:
                forum_dict['forum_url'] = forum_single[1]
            forum_dict['forum_type'] = forum_single[3]
            forum_list.append(forum_dict)
        return forum_list

    def thread(self, fid, url, title, content=''):
        '''
		插入
		'''
        if content:
            str_sql = "insert into thread (fid,url,title,content) values(%s,%s,%s,%s)"
            str_tuple = (fid, url, title, content)
        else:
            str_sql = "insert into thread (fid,url,title) values(%s,%s,%s)"
            str_tuple = (fid, url, title)
        result = self.conn.query2(str_sql, str_tuple)
        return result

    def thread_exist(self, fid, url):
        #判断是否存在
        str_sql = "select count(*) from thread where fid=%s and url='%s'" % (
            fid, url)
        count = self.conn.selectone(str_sql)
        if count[0] == 1:
            return True
        if not count[0]:
            return False

    def thread_kongbody(self, fid, url):
        #判断是否存在
        str_sql = "insert into thread (fid,url,status) values(%s,%s,%s)"
        count = self.conn.query2(str_sql, (fid, url, 3))
        return True

    def attach(self, attach_list, tid):
        for num in range(len(attach_list)):
            str_sql = "insert into attach (url,tid) values('%s',%s)" % (
                attach_list[num], tid)
            if 1062 == self.conn.query(str_sql):
                str_sql = "update attach set count = count +1 where url = '%s'" % (
                    attach_list[num])
            self.conn.query(str_sql)
        return True