def select_one(condition={}, collection='ip_pool', db=DB): try: db = get_db(db) results = db[collection].find_one(condition) return results except: utils.write_debug(utils.LINE(), "dealIp", traceback.print_exc())
def browser_click1(url, proxy): try: WIDTH = 1336 HEIGHT = 750 PIXEL_RATIO = 3.0 UA = random.choice(userAgent.USERAGENT['google'] + userAgent.USERAGENT['ios']) mobileEmulation = { "deviceMetrics": { "width": WIDTH, "height": HEIGHT, "pixelRatio": PIXEL_RATIO }, "userAgent": UA } options = webdriver.ChromeOptions() # proxy = int(proxy) # ip = utils.get_proxy(1,proxy) # utils.write_debug(utils.LINE(), 'browser', ip) # options.add_argument('--proxy-server=%s' % ip) options.add_argument("headless") options.add_argument("disable-gpu") options.add_experimental_option('mobileEmulation', mobileEmulation) driver = webdriver.Chrome(chrome_options=options) driver.set_page_load_timeout(20) driver.implicitly_wait(10) driver.get(url) time.sleep(30 + random.random() * 30) # el = WebDriverWait(driver,60,0.5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="bs-example-navbar-collapse-1"]/ul/li[3]/a'))) # el.click() # time.sleep(random.random() * 5) driver.quit() except Exception: driver.quit() utils.write_debug(utils.LINE(), 'browser', traceback.print_exc())
def insert(datas, collection='ip_pool', db=DB): try: db = get_db(db) # for data in datas: # data['create_at'] = time.strftime('%Y-%m-%d %H:%M:%S') result = db[collection].insert(datas) return result.inserted_ids except: utils.write_debug(utils.LINE(), "dealIp", traceback.print_exc())
def update(data, condition, collection='ip_pool', db=DB): try: db = get_db(db) # "$currentDate": {"lastModified": True} data['update_at'] = time.strftime('%Y-%m-%d %H:%M:%S') db[collection].update_many(condition, { "$set": data, }) except: utils.write_debug(utils.LINE(), "dealIp", traceback.print_exc())
def check_task(): try: results = TaskCount.filter(task_status=0) for result in results: if result.finish_count >= result.task_count: result.task_status = 1 md5 = result.url_md5 result.save(update_fields=['task_status']) q = ClickTask.get(url_md5=md5) q.task_status = 2 q.save(update_fields=['task_status']) except Exception: utils.write_debug(utils.LINE(), 'tasks', traceback.print_exc()) # @app.task # def day_task(): # # 不控量 # today = datetime.date.today() # # 进程队列 # ts = [] # # 查找status不为2,且时间已到的任务 # try: # query_results = ClickTask.filter(task_status=0, task_detail=0, task_time=today) # for query_result in query_results: # url = query_result.url # md5 = query_result.url_md5 # proxy = query_result.task_area # task_count_obj = TaskCount.get(url_md5=md5) # # querySet序列化 # time_task_dic = json.loads(serializers.serialize('json', [task_count_obj]))[0] # count = time_task_dic['fields']['task_count'] # # celery中起子进程会报错 # # AssertionError: daemonic processes are not allowed to have children # process = multiprocessing.current_process() # # 进程启动前设为非守护进程启动 # process.daemon = False # p = multiprocessing.Process(target=browser.browser_click, args=(url, count, proxy, md5)) \ # if int(query_result.request) == 1 else multiprocessing.Process(target=request.click, # args=(url, count, proxy,md5)) # p.start() # ts.append(p) # # 启动后再设为true # process.daemon = True # utils.write_debug(utils.LINE(), 'tasks', # 'start:' + md5 + ' time: ' + time.strftime('%X') + '\ncount:' + str(count)) # # status置位 # query_results.update(task_status=1) # for t in ts: # t.join() # except Exception: # utils.write_debug(utils.LINE(), 'tasks', traceback.print_exc())
def get_db(db=DB): try: # 建立连接 client = pymongo.MongoClient(IP, PORT) db_auth = client.admin db_auth.authenticate(USER, PASSWD) # admin 数据库有帐号,连接-认证-切换库 db = client[db] client['ad_click']['ip_pool'].create_index( [("create_at", pymongo.ASCENDING)], expireAfterSeconds=30) return db except: utils.write_debug(utils.LINE(), "dealIp", traceback.print_exc())
def click1(url, proxy): try: ip = utils.get_proxy(1, proxy) utils.write_debug(utils.LINE(), 'request', ip) proxies = {"http": "http://" + ip, "https": "https://" + ip} UA = random.choice(userAgent.USERAGENT['pc']) headers = {"User-Agent": UA} requests.adapters.DEFAULT_RETRIES = 5 session = requests.session() session.keep_alive = False session.get(url, proxies=proxies, headers=headers) time.sleep(30 + random.random() * 30) except Exception: utils.write_debug(utils.LINE(), 'request', traceback.print_exc())
def hour_task(): # 控量,每小时查找任务 print('start') now = datetime.datetime.now() hour = 'field_' + str(now.hour) # 进程队列 ts = [] # 查找status不为2,且时间已到的任务 try: query_results = ClickTask.filter(task_status__in=[0, 1], task_time=datetime.date.today()) for query_result in query_results: # url = query_result.url md5 = query_result.url_md5 # proxy = query_result.task_area time_task_obj = TimeTask.get(url_md5=md5) # querySet序列化 time_task_dic = json.loads( serializers.serialize('json', [time_task_obj]))[0] count = time_task_dic['fields'][hour] params = json.loads(serializers.serialize( 'json', [query_result]))[0]['fields'] params['count'] = count # celery中起子进程会报错 # AssertionError: daemonic processes are not allowed to have children process = multiprocessing.current_process() # 进程启动前设为非守护进程启动 process.daemon = False p = multiprocessing.Process(target=browser.browser_click, args=(params,)) \ if int(query_result.request) == 1 else multiprocessing.Process(target=request.click, args=(params,)) p.start() ts.append(p) # 启动后再设为true process.daemon = True utils.write_debug( utils.LINE(), 'tasks', 'start:' + md5 + ' time: ' + time.strftime('%X') + '\ncount:' + str(count)) # status置位 query_results.update(task_status=1) for t in ts: t.join() except Exception: utils.write_debug(utils.LINE(), 'tasks', traceback.print_exc())
def click1(url): try: ip = utils.get_proxy(1) utils.write_debug(utils.LINE(), 'request', ip) proxies = {"http": "http://" + ip, "https": "https://" + ip} UA = random.choice(userAgent.USERAGENT['pc']) headers = {"User-Agent": UA,"Referer":"http://www.ebrun.com/retail/b2c/"} requests.adapters.DEFAULT_RETRIES = 5 session = requests.session() session.keep_alive = False r = session.get(url, proxies=proxies, headers=headers) time.sleep(random.random() * 3) soup = BeautifulSoup(r.text,'lxml') links = soup.find_all('a',class_='buyEm') link = random.choice(links) url = link.get('href') print (url) session.get(url, proxies=proxies, headers=headers) time.sleep(random.random()*30) except Exception: utils.write_debug(utils.LINE(), 'request', traceback.print_exc())
def dosql(request): try: url = request.GET.get('url') task_detail = request.GET.get('task_detail') task_time = request.GET.get('task_time') if task_time: task_time = datetime.datetime.strptime(task_time, '%Y-%mm-%d') if task_time < datetime.datetime.now(): return HttpResponse('任务发布失败,请检查任务开始的时间!') else: task_time = datetime.date.today() + datetime.timedelta(days=1) md5 = hashlib.md5() md5.update((url + str(time.time())).encode('utf8')) MD5 = md5.hexdigest() app_clicktask = { 'url': url, 'url_md5': MD5, 'task_detail': task_detail, 'task_time': task_time } ClickTask.create(**app_clicktask) ClickTask.objects.filter(id) if int(task_detail) == 0: task_count = request.GET.get('task_count') app_taskcount = {'url_md5': MD5, 'task_count': task_count} TaskCount.create(**app_taskcount) else: app_timetask = {'url_md5': MD5} task_count = 0 for i in range(24): app_timetask['field_' + str(i)] = request.GET.get(str(i)) task_count += int(request.GET.get(str(i))) TimeTask.create(**app_timetask) app_taskcount = {'url_md5': MD5, 'task_count': task_count} TaskCount.create(**app_taskcount) return HttpResponse('任务发布成功,您的任务编号为: ' + MD5) except Exception as e: utils.write_debug(utils.LINE(), 'dosql', traceback.print_exc()) return HttpResponse('任务发布失败\n' + str(e))
def dealIp(): m = 0 try: datas = {} ips = utils.get_proxy(10) n = len(ips.split('\r\n')) for data in ips.split('\r\n'): datas['_id'] = data if select_one({'_id': data}, 'ip_pool'): pass else: insert([datas], 'ip_pool') m += 1 datas = {} utils.write_debug(utils.LINE(), "dealIp", 'total ip num is {}'.format(n)) utils.write_debug(utils.LINE(), "dealIp", 'success ip num is {}'.format(m)) except: utils.write_debug(utils.LINE(), "dealIp", traceback.print_exc())
def CPM(request): try: # 获取数据 task_area = request.POST.get('task_area') flow_demand = request.POST.get('flow_demand') referer = request.POST.get('referer') remaintime = request.POST.get('remaintime') url = request.POST.get('url') tfip = request.POST.get('tfip') pvbs = request.POST.get('pvbs') is_control = request.POST.get('is_control') task_time = request.POST.get('task_time') remark = request.POST.get('remark') user_id = request.POST.get('user_id') # 计算和处理 md5 = hashlib.md5() md5.update((url + str(time.time())).encode('utf8')) url_md5 = md5.hexdigest() if tfip.isdigit(): tfip = int(tfip) else: raise Exception(u"投放ip请输入数字") # click_task if task_time: task_time = datetime.datetime.strptime(task_time, '%Y-%m-%d') if task_time < datetime.datetime.now(): raise Exception(u'任务日期小于当前时间,请检查!') else: task_time = datetime.date.today() + datetime.timedelta(days=1) click_task = { 'url': url, 'url_md5': url_md5, 'task_time': task_time, 'task_area': task_area, 'pv': pvbs, 'user_id': user_id } if int(flow_demand) == 0 or int(flow_demand) == 1: if int(flow_demand) == 1: if remaintime.isdigit(): click_task['remaintime'] = remaintime if referer: click_task['referer'] = referer click_task['task_detail'] = 0 else: if int(flow_demand) == 3: if remaintime: click_task['remaintime'] = remaintime if referer: click_task['referer'] = referer click_task['task_detail'] = 1 click_task['remark'] = remark if remark else u'无' task_id = ClickTask.objects.create(**click_task).id # task_count task_count = { 'url_md5': url_md5, 'task_count': tfip, 'task_id_id': task_id } TaskCount.objects.create(**task_count) # time_task time_task = {'url_md5': url_md5, 'task_id_id': task_id} if int(is_control) == 0: count = int(tfip / 24) for i in range(24): time_task['field_' + str(i)] = count elif int(is_control) == 1: for i in range(24): time_task['field_' + str(i)] = request.POST.get(str(i)) TimeTask.objects.create(**time_task) # business_cpmwork cpmwork = { 'user_name_id': user_id, 'task_id_id': task_id, 'url': url, 'click_nums': tfip, 'ip_nums': 0, 'pv_nums': 0, 'status': 0, 'is_control': is_control, 'remark': remark, 'task_time': task_time } CPMWork.objects.create(**cpmwork) # cpmwork['task_time'] = time.strftime('%Y-%m-%d %X',time.localtime()) return render(request, 'CPMWork.html', {'cpmworks': [cpmwork]}) # return HttpResponse(click_task) except Exception as e: utils.write_debug(utils.LINE(), 'business/views', traceback.print_exc()) return HttpResponse(e)
def browser_click1(url, proxy, referer, remaintime, task_detail, pv, width, height, is_random, sectime, securl, element, task_type): try: WIDTH = width HEIGHT = height PIXEL_RATIO = 3.0 if task_detail == 0: UA = random.choice(userAgent.USERAGENT['pc']) elif task_detail == 1: UA = random.choice(userAgent.USERAGENT['google'] + userAgent.USERAGENT['ios']) elif task_detail == 2: UA = random.choice(userAgent.USERAGENT['ios']) else: UA = random.choice(userAgent.USERAGENT['google']) mobileEmulation = { "deviceMetrics": { "width": WIDTH, "height": HEIGHT, "pixelRatio": PIXEL_RATIO }, "userAgent": UA } if referer: referer = random.choice(referer.split(';')) pass options = webdriver.ChromeOptions() if int(proxy) == 9999: pass else: proxy = int(proxy) ip = utils.get_proxy(proxy) utils.write_debug(utils.LINE(), 'browser', ip) options.add_argument('--proxy-server=%s' % ip) options.add_argument("headless") options.add_argument("disable-gpu") options.add_experimental_option('mobileEmulation', mobileEmulation) driver = webdriver.Chrome(chrome_options=options) driver.set_page_load_timeout(60) driver.implicitly_wait(60) driver.get(url) for i in range(pv - 1): js = 'window.open("{}");'.format(url) driver.execute_script(js) time.sleep(random.uniform(remaintime - 3, remaintime + 3)) ''' CPC ''' if task_type == 1: if is_random == 1: urls = BeautifulSoup(driver.page_source, 'lxml').find_all('a') url = random.choice(urls).get('href') driver.get(url) time.sleep(sectime) else: if securl: # <a class="swf-top" href="http://tv.sohu.com/s2015/dsjwxfs" target="_blank"></a> urls = securl.split(';') url = random.choice(urls) print(url) driver.get(url) time.sleep(sectime) # soup.findAll('a',{'class':'lb'}) # Out[14]: [<a class="lb" href="http://www.baidu.com/bdorz/login.gif?login&tpl=mn&u=http%3A%2F%2Fwww.baidu.com%2f%3fbdorz_come%3d1" name="tj_login">ç»å½</a>] elif element: element_key = element.split(':')[0] element_value = element.split(':')[1] urls = BeautifulSoup(driver.page_source, 'lxml').findAll( 'a', {element_key: element_value}) url = random.choice(urls) driver.get(url) time.sleep(sectime) # el = WebDriverWait(driver,60,0.5).until(EC.presence_of_element_located((By.XPATH,'//*[@id="bs-example-navbar-collapse-1"]/ul/li[3]/a'))) # el.click() # time.sleep(random.random() * 5) print(driver.title) driver.get('about:blank') driver.close() driver.quit() except Exception as e: print(e) driver.close() driver.quit() utils.write_debug(utils.LINE(), 'browser', traceback.print_exc())