def get(self, request): util = Util() try: sort = ArticleSortRecord.objects.filter(name='Feelings')[0] except: sort = None articles = ArticlesRecord.objects.order_by('-add_time') new_articles = articles[:5] articles = articles.filter(sort=sort) scenery_images = PhotoShowRecord.objects.filter(image_type='scenery') try: scenery_images = util.selectData(datas=scenery_images, data_len=scenery_images.count(), select_len=2) except: scenery_images = None try: page = request.GET.get('page', 1) except PageNotAnInteger: page = 1 p = Paginator(articles, 6, request=request) articles = p.page(page) return render( request, 'feelings.html', { 'active': 'feelings', 'articles': articles, 'article_sort': sort, 'scenery_images': scenery_images, 'new_articles': new_articles })
def get(self, request): util = Util() photo_shows = PhotoShowRecord.objects.filter(image_type='photos') try: photo_shows = util.selectData(datas=photo_shows, data_len=photo_shows.count(), select_len=12) colors = [ 'shadow-blue', 'shadow-yellow', 'shadow-orangered', 'shadow-red', 'shadow-green' ] tmp_photo_shows = [] for photo_show in photo_shows: index = random.randint(0, 4) photo_show = {'photo_show': photo_show, 'color': colors[index]} tmp_photo_shows.append(photo_show) except: tmp_photo_shows = None return render(request, 'photos.html', { 'tmp_photo_shows': tmp_photo_shows, 'active': 'photos', })
def get_total_html(self, request): util = Util() article_sorts = ArticleSortRecord.objects.all() articles = ArticlesRecord.objects.order_by('-add_time') try: about_me = AboutMeRecord.objects.get(id=1) except: about_me = None new_articles = articles[:5] try: feelings_sort = article_sorts.get(name='Feelings') except: feelings_sort = None article_sorts = article_sorts.filter(~Q(name='Feelings')) articles = articles.filter(~Q(sort=feelings_sort)) colors = [ 'article-class-btn-color-1', 'article-class-btn-color-2', 'article-class-btn-color-3', 'article-class-btn-color-4' ] article_sorts = util.selectData(datas=article_sorts, data_len=article_sorts.count(), select_len=article_sorts.count()) tmp_article_sorts = [] try: for article_sort in article_sorts: index = random.randint(0, 3) tmp_article_sort = { 'article_sort': article_sort, 'color': colors[index] } tmp_article_sorts.append(tmp_article_sort) except: tmp_article_sorts = [] try: page = request.GET.get('page', 1) except PageNotAnInteger: page = 1 p = Paginator(articles, 5, request=request) articles = p.page(page) return render( request, 'study.html', { 'active': 'study', 'tmp_article_sorts': tmp_article_sorts, 'articles': articles, 'about_me': about_me, 'new_articles': new_articles })
def update(): u = Util() print("Uptating cost basis") print(request.json) data = request.json print(data) if not u.validate_input(data['data']): raise AssertionError("Invalid Data") print("data valid") u.update_file(output_file, data['data']) return {'Updated'}
def __init__(self): """ This is a constructor for the Time parser. It initiates a TimeParser with desired intervals e.g. interval_in__milliseconds=10000 creates intervals of 10 seconds in that video """ self.interval_in_milliseconds = int( Util().config["parser"]["milliseconds-interval"] if isfile("config/config.yml") else getenv("MILLISECONDS_INTERVAL") )
def post(self, request): send_message_form = SendMessageForm(request.POST) util = Util() if send_message_form.is_valid(): send_message_form.save(commit=True) message_records = SendMessageRecord.objects.order_by('-add_time') last_page = math.ceil(message_records.count() / 4.0) message_records = message_records[:4] return util.get_messages(request, message_records, last_page) else: return util.get_message_warnings(request)
def get(self, request): print 'testsetestestsetestsstest' util = Util() send_messages = SendMessageRecord.objects.order_by('-add_time') writings = WritingsRecord.objects.filter(type='from_others') new_articles = ArticlesRecord.objects.order_by('-add_time')[:5] head_images = PhotoShowRecord.objects.filter(image_type='headImg') try: writings = util.selectData( datas = writings, data_len = writings.count(), select_len = 1 )[0] except: writings = None try: head_images = util.selectData( datas = head_images, data_len = head_images.count(), select_len = 5 ) except: head_images = None try: page = request.GET.get('page', 1) except PageNotAnInteger: page = 1 ignore_page_operation = IgnorePageOperation(int(page), int(math.ceil(send_messages.count() / 4.0))) pre_ignore_page, next_ignore_page = ignore_page_operation.getIgnorePage() p = Paginator(send_messages, 4, request=request) send_messages = p.page(page) try: head_image = head_images[0] except: head_image = None return render(request, 'message.html', { 'active': 'message', 'send_messages': send_messages, 'writings': writings, 'new_articles': new_articles, 'head_images': head_images, 'top_headImage': head_image, 'pre_ignore_page': pre_ignore_page, 'next_ignore_page': next_ignore_page })
def post(self, request): send_comment_form = SendCommentForm(request.POST) util = Util() if send_comment_form.is_valid(): send_comment_form.save(commit=True) from_article_id = request.POST.get('from_article') article = ArticlesRecord.objects.get(id=from_article_id) message_records = SendCommentRecord.objects.filter(from_article=article) message_records = message_records.order_by('-add_time') last_page = int(math.ceil(message_records.count() / 3.0)) message_records = message_records[:3] return util.get_messages(request, message_records, last_page) else: return util.get_message_warnings(request)
def get(self, request): util = Util() photo_shows = PhotoShowRecord.objects.filter(image_type='banner') photo_shows = util.selectData( datas = photo_shows, data_len = photo_shows.count(), select_len = 5 ) writing_image = PhotoShowRecord.objects.filter(image_type='scenery') try: writing_image = util.selectData( datas = writing_image, data_len = writing_image.count(), select_len = 1 )[0] except: writing_image = None articles = ArticlesRecord.objects.order_by('-add_time')[:5] try: about_me = AboutMeRecord.objects.get(id=1) except: about_me = None try: writing = WritingsRecord.objects.filter(type='from_me') writing = writing.order_by('-add_time')[0] except: writing = None links = LinksRecord.objects.all() return render(request, 'index.html', { 'active': 'home', 'photo_shows': photo_shows, 'articles': articles, 'about_me': about_me, 'writing': writing, 'links': links, 'writing_image': writing_image })
class ClientAbstract(object): config = Util().config if exists("config/config.yml") else None @staticmethod def read_files_from_directory(path): files = [f for f in listdir(path) if isfile(join(path, f))] return files @staticmethod def read_file_from_directory(path): try: with open(path) as f: return json.load(f) except ValueError: print("could not read from the json file") def write_status_file(self, file, path): if self.config: out_path = join(self.config["files"]["processed-directory"], path) else: out_path = join(getenv("FILE_PROCESSING_LOGS_DIR"), path) with open(out_path, "a") as f: f.write(file + "\n")
def get(self, request): util = Util() try: article_id = request.GET.get('id', 1) except: article_id = 1 try: send_sort = request.GET.get('sort', 0) except: send_sort = 0 articles = ArticlesRecord.objects.order_by('-add_time') new_articles = articles[:5] article = articles.get(id=int(article_id)) comments = SendCommentRecord.objects.filter(from_article=article) comments = comments.order_by('-add_time') writing = WritingsRecord.objects.filter(type='from_others') try: writing = util.selectData(datas=writing, data_len=writing.count(), select_len=1)[0] except: writing = None article_sorts = ArticleSortRecord.objects.all() feelings_sort = article_sorts.get(name='Feelings') article_sort = ArticleSortRecord.objects.get(name=article.sort) article_sorts = util.selectData(datas=article_sorts, data_len=article_sorts.count(), select_len=4) tmp_article_sorts = [] colors = [ 'btn article-class-btn-color-1', 'btn article-class-btn-color-2', 'btn article-class-btn-color-3', 'btn article-class-btn-color-4' ] for _sort in article_sorts: index = random.randint(0, 3) tmp_article_sort = {'article_sort': _sort, 'color': colors[index]} tmp_article_sorts.append(tmp_article_sort) if send_sort == 0 and article_sort.name != 'Feelings': articles = articles.filter(~Q(sort=feelings_sort)) pre_article, next_article = self.get_pre_next( articles, article.add_time) else: pre_article, next_article = self.get_pre_next( articles.filter(sort=article_sort), article.add_time) try: page = request.GET.get('page', 1) except PageNotAnInteger: page = 1 ignore_page_operation = IgnorePageOperation( int(page), int(math.ceil(comments.count() / 3.0))) pre_ignore_page, next_ignore_page = ignore_page_operation.getIgnorePage( ) p = Paginator(comments, 3, request=request) comments = p.page(page) head_images = PhotoShowRecord.objects.filter(image_type='headImg') head_images = util.selectData(datas=head_images, data_len=head_images.count(), select_len=5) try: head_image = head_images[0] except: head_image = None return render( request, 'article.html', { 'active': 'study', 'article': article, 'comments': comments, 'pre_article': pre_article, 'next_article': next_article, 'writing': writing, 'tmp_article_sorts': tmp_article_sorts, 'new_articles': new_articles, 'send_sort': send_sort, 'article_sort': article_sort, 'not_feelings': article_sort.name != 'Feelings', 'head_images': head_images, 'first_image': head_image, 'pre_ignore_page': pre_ignore_page, 'next_ignore_page': next_ignore_page })
from utils.util import Util import random import sys import logging from datetime import datetime __UTIL = Util() __LOGGER = logging.getLogger("DataGeneratorLogger") __NAMES = __UTIL.load_base("names") __ADDRESS = __UTIL.load_base("address") __CITIES = __UTIL.load_base("city") def generate(cpf_lines): print( f"Starting Data Generator Application for {cpf_lines} records at {datetime.now()}" ) f = open("../../../output/generated.csv", "a+") header = "id;name;cpf;address;city;state;parent_name;parent_document;parent_city;parent_state;" f.write(header) for index in range(cpf_lines): if (index % 2 == 0): cpf = __UTIL.generate_valid_cpf() parent_cpf = __UTIL.generate_valid_cpf() else: cpf = __UTIL.generate_invalid_cpf()
def add_info_by_urls(urls): count = 0 total_count = 0 for i in urls: print '本次处理的网址是:' print i try: html = requests.get(i, headers=headers) except: print '处理失败,10分钟后重试' time.sleep(10) html = requests.get(i, headers=headers) soup = BeautifulSoup(html.text, 'html.parser') job_info = soup.find('div', 'info-primary') try: job_title = job_info.find('div', 'name').find('h1', '').text.encode( 'UTF-8') #获取工作title except: print 'ip被限制了,给你60s 手动解冻下' time.sleep(60) try: html = requests.get(i, headers=headers) except: print '处理失败,10分钟后重试' time.sleep(10) html = requests.get(i, headers=headers) soup = BeautifulSoup(html.text, 'html.parser') job_info = soup.find('div', 'info-primary') job_title = job_info.find('div', 'name').find('h1', '').text.encode( 'UTF-8') #获取工作title money = job_info.find('div', 'name').find('span', 'salary').text.encode( 'UTF-8') #获取工资 job_requirements = job_info.find('p').find_all( text=re.compile('.*')) # work_location = ''.join(job_requirements[0]).encode('UTF-8') #工作地点 work_experience = ''.join(job_requirements[1]).encode('UTF-8') #经验要求 education = ''.join(job_requirements[2]).encode('UTF-8') #学历要求 job_box = soup.find('div', 'job-box') jd = job_box.find('div', 'text').text.encode('UTF-8') #工作描述 try: company_name = job_box.find('div', 'detail-content').find( 'div', 'name').text.encode('UTF-8') #公司名称 company_type = ''.join( job_box.find('li', 'company-type').find_all( text=re.compile('.*'))[1]).encode('UTF-8') #公司类型 except: company_name_str = soup.title.text.encode('UTF-8') company_name = company_name_str.split('_')[1].split('-')[0] company_type = '未知' create_times = datetime.datetime.now().date() url = i util = Util() md5_str = job_title + company_name + str(url) md5 = util.get_md5(md5_str) sql = """ insert into boss_spider (job_title,company_name,company_type,money,work_location,work_experience,education,jd,url,create_times,md5) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) on DUPLICATE KEY UPDATE param = %s; """ count_sql = """ select param from boss_spider where md5 = %s; """ dbutil = DBUtil(_ZECHN_DB) # print count_sql % md5 spider_count = dbutil.read_one(count_sql, md5) if spider_count == None: count += 1 print '有一个新岗位出现呦!' else: spider_count = spider_count[0] spider_count = int(spider_count) + 1 #重复爬取次数 params = [ job_title, company_name, company_type, money, work_location, work_experience, education, jd, url, create_times, md5, spider_count ] try: result = dbutil.execute(sql, params) except: print '有一条插入失败 url为:' print url total_count += 1 if (total_count % 17 == 0): print '每爬完17条具体职位数据后休息1s' time.sleep(1) print '正在处理第%d条职位数据' % total_count sleep_time = random.uniform(0, 1) # print '处理完成,睡眠%s秒,继续爬下一条数据' % sleep_time print '..............................................' # time.sleep(sleep_time) print 'success!一共发现了%d条新职位' % count