def create_file(user_id, folder_id, name, text): # Make sure user and folder exist user = User.objects.filter(id=user_id).first() folder = Folder.objects.filter(id=folder_id).first() if not user or not folder: return None # Ensure that the input folder is owned by the input user if folder.owner.id != user.id: return None # Validate that a file with this name does not already exist owned_file_names = [folder.name for folder in folder.owned_files.all()] if name in owned_file_names: return None # Create the new file with transaction.atomic(): new_file = File(name=name, belong=folder) new_file.save() new_content = Content(text=text, version=0, file=new_file) new_content.save() # Serialize new file and content to JSON return FileRepo.to_json(new_file, new_content)
def video(): imageDirPath = "app/static/images" images = [] if os.path.exists(imageDirPath): images = [ image for image in os.listdir(imageDirPath) if image.endswith(".jpg") ] content = Content().latest() if Content().latest() else Content() return render_template('main/video.html', images=images, data=dict(content))
def new_order(): newdict = {} if request.method == 'POST': data = request.form.to_dict() #print(data) for key in data: if data[key] != '': newdict[key] = data[key] #print("Added to dict") if (not newdict): flash('Your order is empty.', 'info') return redirect(url_for('main.new_order')) order = Order(author=current_user) db.session.add(order) db.session.commit() for key in newdict: #print(key, newdict[key]) if (newdict[key] == 'on'): temp = key + "-quantity" quantity = 1 if temp not in newdict else int(newdict[temp]) content = Content(cart=order, item_id=int(key), quantity=int(quantity)) db.session.add(content) db.session.commit() return redirect(url_for('main.home')) items = Item.query.order_by(collate(Item.name, 'NOCASE')).all() return render_template('new_order.html', items=items)
async def get_content( params: PaginationParams = Depends(), type: Optional[str] = Query( None, description="Either ``'Movie'`` or ``'Show'``."), rating: Optional[str] = Query( None, description= "Valid MPAA or TV Parental Guidelines Monitoring Board rating."), order_by: Optional[str] = Query( None, description='Accepts a model attribute. Prefix with `-` for descending.' ), ): content = Content.all() if type is not None and type.upper() in ContentType._member_names_: content = content.filter(type=type.capitalize()) elif type is not None: raise_exception(detail=f'{type} is not a valid option for `type`.') if rating is not None and _snake_case( rating).upper() in Rating._member_names_: content = content.filter(rating=rating.upper()) elif rating is not None: raise_exception(detail=f'{rating} is not a valid option for `rating`.') if order_by in ContentOutGet.schema()['properties'].keys(): content.order_by(order_by) elif order_by is not None: raise_exception( detail=f'{order_by} is not an attribute that can be ordered by.') return paginate(await ContentOutGet.from_queryset(content), params)
def test_cms_update_content(self): content = Content(bio="unique_data") db.session.add(content) db.session.commit() response = self.testapp.get("/bio", follow_redirects=True) self.assertEqual(200, response.status_code) self.assertTrue(b"unique_data" in response.data)
def setColumnContent(): name = request.values.get('name') subtitle = request.values.get('subtitle', default=None) content_details = request.values.get('content') time = getCurrentDateTime() phone = current_user.get_id() photo = savePhotoRequestFile('photo') video = saveVideoRequestFile('video') audio = saveAudioRequestFile('audio') visition = request.values.get('visition', type=int) view_type = request.values.get('viewType', default=0) rich_text = request.values.get('rich_text', default=None) type = request.values.get('type') father_id = request.values.get('fatherID') column_id = request.values.get('columnID') if father_id == "-1": father_id = None content = Content(name=name, subtitle=subtitle, content_details=content_details, time=time, phone=phone, photo=photo, video=video, audio=audio, visition=visition, view_type=view_type, rich_text=rich_text, type=type, father_id=father_id, column_id=column_id, live=0) db.session.add(content) db.session.commit() return json.dumps(sendData(True, "提交成功", 'OK'))
def show_content(content_id): content_json = request.get_json() content = Content.get_item_by_id(content_json['id']) content.links_data = str(content_json['links_data']) content.blocks_data = str(content_json['blocks_data']) content.add_itself() return str(content.id)
def cms(): form = ContentUpdateForm() if form.validate_on_submit(): content = Content(bio=form.bio.data, releases=form.releases.data, podcasts=form.podcasts.data, videos=form.videos.data, contact=form.contact.data, impressum=form.impressum.data) db.session.add(content) db.session.commit() flash('changes updated successfully.') return redirect(url_for('main.cms')) else: try: form.bio.data = Content().latest().bio form.releases.data = Content().latest().releases form.podcasts.data = Content().latest().podcasts form.videos.data = Content().latest().videos form.contact.data = Content().latest().contact form.impressum.data = Content().latest().impressum except: pass return render_template('main/cms.html', form=form)
def test_impressum(self): response = self.testapp.get("/impressum", follow_redirects=True) self.assertEqual(200, response.status_code) self.assertTrue(b"::impressum()" in response.data) db.session.add(Content(impressum="unique_data_impressum")) db.session.commit() response = self.testapp.get("/impressum", follow_redirects=True) self.assertTrue(b"unique_data_impressum" in response.data)
def test_bio(self): response = self.testapp.get("/bio", follow_redirects=True) self.assertEqual(200, response.status_code) self.assertTrue(b"::bio()" in response.data) db.session.add(Content(bio="unique_data_bio")) db.session.commit() response = self.testapp.get("/bio", follow_redirects=True) self.assertTrue(b"unique_data_bio" in response.data)
def test_contact(self): response = self.testapp.get("/contact", follow_redirects=True) self.assertEqual(200, response.status_code) self.assertTrue(b"::contact()" in response.data) db.session.add(Content(contact="unique_contact_data")) db.session.commit() response = self.testapp.get("/contact", follow_redirects=True) self.assertTrue(b"unique_contact_data" in response.data)
def publish_new_version(user_id, file_id, text): fo = File.objects.filter(id=file_id, belong__owner__id=user_id).first() if not fo: return None # Grab the latest version of the file content currently stored in database co = fo.file_content.latest('version') if not co: # Should never happen... return None # Create a new content object with the input text and file as foreign key new_co = Content(text=text, version=co.version + 1, file=fo) # Save the new version of the file content to the datbase new_co.save() # Return the updated file return FileRepo.to_json(fo, new_co)
def publish(content_id): title = escape(request.form["title"]) body = escape(request.form["body"]) if title == "" or body == "": abort(404) if(content_id == "new"): content = Content(title=title, body=body, user_id=current_user.id) db.session.add(content) db.session.commit() return redirect(url_for("index")) else: content = Content.query.filter_by(id=content_id).all()[0] if content.user_id == current_user.id: content.title = title content.body = body db.session.commit() return redirect(url_for("index")) else: abort(404)
def index(content_id): content = Content.get_item_by_id(content_id) if (content is None): content = Content() content.id = content_id content.add_itself() content = Content.get_item_by_id(content_id) content.__dict__.pop('_sa_instance_state', None) return render_template('index.html', content=content.__dict__)
def post_content(tt_code): content = db.session.query(Content).filter(Content.id == tt_code).first() if content is not None: return abort(409) payload = request.get_json() company_names = payload["company_names"] content = Content() content.id = tt_code for company_name in company_names: company = db.session.query(Company).filter(Company.name == company_name).first() if company is None: return abort(404, f"Company {company_name} is not known") content.companies.append(company) db.session.add(content) db.session.commit() return make_response("CREATED", 201)
def get_all_content(): """Retrieve all content.""" page = request.args.get('page', 1, type=int) per_page = min(request.args.get('per_page', 30, type=int), 100) text = request.args.get('text', False, type=bool) data = Content.content_to_collection_dict( Content.query \ .filter(Content.status) \ .order_by(Content.phase.asc(), Content.section.asc()), page, per_page, 'api.v1.get_all_content', text=text) return jsonify(data)
def create_content(): """Create a new content section.""" if g.current_user.group not in ['admin']: abort(403) data = request.get_json() or {} # Add the content section content = Content() content.from_dict(data, new=True) try: db.session.add(content) db.session.commit() except exc.IntegrityError: db.session().rollback() return '', 400 response = jsonify(content.to_dict()) response.status_code = 201 response.headers['Location'] = url_for('api.v1.get_content', public_id=content.public_id) return response
def test_audio(self): response = self.testapp.get("/audio", follow_redirects=True) self.assertEqual(200, response.status_code) self.assertTrue(b"::audio()" in response.data) db.session.add( Content(releases="unique_data_release", podcasts="unique_data_podcast")) db.session.commit() response = self.testapp.get("/audio", follow_redirects=True) self.assertTrue(b"unique_data_release" in response.data) self.assertTrue(b"unique_data_podcast" in response.data)
def bbc_sport(): page = requests.get('https://www.bbc.com/sport') soup = BeautifulSoup(page.content, 'html.parser') links = soup.find_all('a', {'class': 'gs-c-promo-heading'}) titles = [links[i].text for i in range(1, 11)] links = ["https://www.bbc.com" + links[i]['href'] for i in range(1, 9)] for i in range(8): con = Content(page='bbc', link=links[i], title=titles[i], time=datetime.now(), type='sport') db.session.add(con) db.session.commit()
def post(): form = markdownform() if form.validate_on_submit(): try: title = request.form['title'] content = request.form['pagedown'] sub = Content(title=title, content=content, author=current_user) db.session.add(sub) db.session.commit() return redirect('/') except: return 'There was some problem submitting your post !!!' return render_template('post.html', form=form)
def test_latest(self): content_old = Content(bio="a", releases="b", podcasts="c", videos="d", contact="e", impressum="f") content_new = Content(bio="aa", releases="bb", podcasts="cc", videos="dd", contact="ee", impressum="ff") db.session.add(content_old) db.session.add(content_new) db.session.commit() latest = Content().latest() self.assertEqual("aa", latest.bio) self.assertEqual("bb", latest.releases) self.assertEqual("cc", latest.podcasts) self.assertEqual("dd", latest.videos) self.assertEqual("ee", latest.contact) self.assertEqual("ff", latest.impressum)
def listing(id): p = Produce.query.all() s = Supplier.query.all() listing = Listing.query.get(id) form = ContentForm() if form.validate_on_submit(): content = Content(quantity=form.quantity.data, cart=current_user, list=listing) db.session.add(content) db.session.commit() return redirect(url_for('browse')) return render_template('listing.html', listing=listing, produce=p, supplier=s, title='Listing', form=form)
def test_dict_conversion(self): content = Content(bio="aaa", releases="bbb", podcasts="ccc", videos="ddd", contact="eee", impressum="fff") try: content_dict = dict(content)['content'] self.assertEqual("aaa", content_dict['bio']) self.assertEqual("bbb", content_dict['releases']) self.assertEqual("ccc", content_dict['podcasts']) self.assertEqual("ddd", content_dict['videos']) self.assertEqual("eee", content_dict['contact']) self.assertEqual("fff", content_dict['impressum']) except: self.fail("dict conversion failed")
def upload_file(): if request.method == 'POST': url = request.form.get('url') account_id = request.form.get('account') caption = request.form.get('caption') # check if the post request has the file part if 'file' not in request.files and url == '': # flash('No file part') return redirect(request.url) file = request.files['file'] # if user does not select file, browser also # submit a empty part without filename if file.filename == '': flash('No selected file') return redirect(request.url) if file and allowed_file(file.filename): fs = secure_filename(file.filename) ext = fs.split(".")[-1] fn = "{}.{}".format(str(uuid.uuid4()), ext) path = os.path.abspath( os.path.join(app.config['UPLOAD_FOLDER'], fn)) file_url = "{}/{}".format(cfg.UPLOAD_URL, fn) file.save(path) type = "photo" thumbnail = None if ext == 'mp4': type = "video" thumbnail = None content = Content(insta_account_id=account_id, caption=caption, url=file_url, path=path, type=type, thumbnail=thumbnail) db.session.add(content) db.session.commit() return redirect(url_for('view_contents'))
def gazeta_sport(): page = requests.get("https://www.gazeta.pl/0,0.html") soup = BeautifulSoup(page.content, 'html.parser') clas = soup.find_all('section', class_='o-section__simple-news-list')[1] a = clas.find_all('a') titles = [ a[i].get('title') for i in range(app.config['START_INDEX_GAZETA'], app.config['START_INDEX_GAZETA'] + 10) ] links = [ a[i].get('href') for i in range(app.config['START_INDEX_GAZETA'], app.config['START_INDEX_GAZETA'] + 10) ] for i in range(10): con = Content(page='gazeta', link=links[i], title=titles[i], time=datetime.now(), type='sport') db.session.add(con) db.session.commit()
def interia(): page = requests.get('https://www.interia.pl/') soup = BeautifulSoup(page.content, 'html.parser') clas = soup.find('section', class_='news-section') a = clas.find_all('a') titles = [ a[i].get('title') for i in range( app.config['START_INDEX_INTERIA'], app.config['START_INDEX_INTERIA'] + app.config["LENGTH"]) ] links = [ a[i].get('href') for i in range( app.config['START_INDEX_INTERIA'], app.config['START_INDEX_INTERIA'] + app.config["LENGTH"]) ] for i in range(10): con = Content(page='interia', link=links[i], title=titles[i], time=datetime.now(), type='news') db.session.add(con) db.session.commit()
def Add_Data_To_Url(url): # close_old_connections() try: ip = get_host(url) if ip == '获取失败': return # print('[+ Domain UrlIP] IP解析 --> {} IP --> {}'.format(url, ip)) # Sem.acquire() try: test_url = list(URL.objects.filter(url=url)) except: try: test_url = list(URL.objects.filter(url=url)) except: test_url = list(URL.objects.filter(url=url)) # Sem.release() # 如果数据库有这个网站的话,就直接退出 if test_url != []: return try: Test_Other_Url = Other_Url.objects.filter(url=url) # 判断网络资产表是否有这个数据,如果没有的话,就添加进去 if list(Test_Other_Url) == []: res = Get_Url_Info(url).get_info() res_url = res.get('url') try: res_title = pymysql.escape_string(res.get('title')) except Exception as e: res_title = 'Error' Except_Log(stat=11, url=url + '|网页内容转码失败', error=str(e)) res_power = res.get('power') res_server = res.get('server') res_status = res.get('status') res_ip = ip try: Other_Url.objects.create(url=res_url, title=res_title, power=res_power, server=res_server, status=res_status, ip=res_ip) except Exception as e: Except_Log(stat=17, url=url + '|标题等信息编码不符合', error=str(e)) Other_Url.objects.create(url=res_url, title='Error', power='Error', server=res_server, status=res_status, ip=res_ip) except Exception as e: Except_Log(stat=29, url=url + '|网络资产表错误', error=str(e)) try: # res = Get_Url_Info(url).get_info() # res_status = res.get('status') # 再次获取状态码,判断是否符合入库状态,以保证数据统一 # if int(res_status) not in Alive_Status: # return # 接下来可以进行数据索引唯一统一 ''' 这里添加网址资产到 索引表 和 清洗表 ''' test_url1 = list(URL.objects.filter(url=url)) # 如果数据库有这个网站的话,就直接退出 if test_url1 == []: URL.objects.create(url=url, ip=ip) # 添加 网址索引 try: try: Show_contents = pymysql.escape_string( Get_Url_Info(url).Requests()[0]) Cont = Content() Cont.url = url Cont.content = Show_contents IP_Res = Get_Ip_Info(ip) Show_cs = IP_Res.get_cs_name(ip) Cont.save() Show_Data.objects.create(url=url, ip=ip, cs=Show_cs, content=Cont) except Exception as e: Except_Log(stat=4, url=url + '|外键添加错误', error=str(e)) Show_contents = 'Error' Cont = Content() Cont.url = url Cont.content = Show_contents IP_Res = Get_Ip_Info(ip) Show_cs = IP_Res.get_cs_name(ip) Cont.save() Show_Data.objects.create(url=url, ip=ip, cs=Show_cs, content=Cont) # 添加网页内容,数据展示 except Exception as e: Except_Log(stat=8, url=url + '|外键添加错误', error=str(e)) This_Sub = [x for x in ALL_DOMAINS if x in url] # 获取到当前子域名属于的主域名 try: # 尝试进行域名总数据获取检测 if This_Sub != []: Domain_Count = Domains.objects.filter(url=This_Sub[0])[0] counts = Other_Url.objects.filter( url__contains=This_Sub[0]) Domain_Count.counts = str(len(counts)) # counts = int(Domain_Count.counts)+1 # Domain_Count.counts = counts Domain_Count.save() except Exception as e: Except_Log(stat=15, url=url + '|获取归属域名失败|' + This_Sub, error=str(e)) except Exception as e: Except_Log(stat=22, url=url + '|添加到网址索引表失败|', error=str(e)) test_ip = list(IP.objects.filter(ip=ip)) # 开始添加ip 维护ip统一 # 这里开始判断数据库中是否有这个ip,并且先添加然后修改(防止重复浪费资源) if test_ip != []: test_ip_0 = IP.objects.filter(ip=ip)[0] # 这里判断数据中IP时候存在,如果存在并且有扫描状态,就直接中断操作 if test_ip_0.get == '是' or test_ip_0.get == '中': return if test_ip == []: try: IP_Res = Get_Ip_Info(ip) area = IP_Res.get_ip_address(ip) cs_name = IP_Res.get_cs_name(ip) try: IP.objects.create(ip=ip, servers='None', host_type='None', cs=cs_name, alive_urls='None', area=area) # 这里先添加数据,异步执行获取到的数据作为结果给下个进程使用 # 这里本来是要扫描c段开放端口,但是这样就相当于把耗时操作加载到同步执行的线程中 # 于是把扫描开放端口 放在获取ip详细信息线程中处理 except: Except_Log(stat=86, url=url + '|转换IP地区编码失败|', error=str(e)) IP.objects.create(ip=ip, servers='None', host_type='None', cs=cs_name, alive_urls='None', area='获取失败') except Exception as e: Except_Log(stat=21, url=url + '|添加IP资源失败|', error=str(e)) except Exception as e: Except_Log(stat=30, url=url + '|维护传入网址失败|', error=str(e))
def Add_Data_To_Url(url): ''' 2019-12-10 1. 该函数作用为传入网址进行IP黑名单过滤 2. 该函数作用为传入网址进行【网络资产数据入库,网址索引数据入库,主机资产数据入库,监控域名数量入库处理】 2020-01-14 1. 新增泛解析过滤规则 ''' time.sleep(random.randint(5, 20)) time.sleep(random.randint(5, 20)) close_old_connections() urlhasdomain = check_black(url, ALL_DOMAINS) if urlhasdomain == False: print('[+ Insert Url] 当前网址不在域名监控域名范围内 : {}'.format(url)) try: close_old_connections() BLACKURL.objects.create(url=url, ip=get_host(url), title=RequestsTitle(url), resons='当前网址不在域名监控域名范围内') return except: close_old_connections() return if '.gov.cn' in url or '.edu.cn' in url: return urlinblackurl = check_black(url, black_url) if urlinblackurl == True: print('[+ URL Blacklist] 当前网址触发黑名单 : {}'.format(url)) try: burl = '' for blacurl in black_url: if blacurl in url: burl = blacurl close_old_connections() BLACKURL.objects.create(url=url, ip=get_host(url), title=RequestsTitle(url), resons='触发网址黑名单:{}'.format(burl)) except Exception as e: pass return try: ip = get_host(url) if ip == '获取失败': try: BLACKURL.objects.create(url=url, ip=get_host(url), title=RequestsTitle(url), resons='获取网址IP失败') except Exception as e: pass return if ip in black_ip: '''触发IP黑名单机制''' print('[+ IP Blacklist] 当前IP触发黑名单 : {} --> {}'.format(ip, url)) try: BLACKURL.objects.create(url=url, ip=get_host(url), title=RequestsTitle(url), resons='触发IP黑名单:{}'.format(ip)) except Exception as e: pass return try: test_url = list(URL.objects.filter(url=url)) except: try: test_url = list(URL.objects.filter(url=url)) except: close_old_connections() test_url = list(URL.objects.filter(url=url)) if test_url != []: '''网址索引表如果已经有该网址,则直接退出''' return ''' 2020-01-14 1. 这里开始对比泛解析数据,判断是否为泛解析网址 2. 分别获取泛解析对比的 标题,ip,网页内容 3. 然后先对比标题,标题一致,说明不是泛解析哦~不过携程还是哪个大厂名字忘了,访问频率过快的话,网页标题会变成 填写验证码xxxx 4. 其次对比网页内容,如果网页内容相似度过大,则说明泛解析哦~ 5. 有人问,为什么不直接对比ip不就行了吗?其实不是的,比如xxadasda.yy.com--->aedqwawrqw668.sdada.yy.com很明显都是泛解析,但是解析的ip都是不一样的 ''' infjx = [x for x in ALL_DOMAINS if x in url] if infjx == []: return else: infjx = infjx[0] inftitle, infip, infcontent = DOMAINSINFOS[infjx][ 'title'], DOMAINSINFOS[infjx]['ip'], DOMAINSINFOS[infjx]['content'] DD = Get_Url_Info(url).get_info() comtitle, comip, comcontent = DD['title'], DD['ip'], DD['content'] # if inftitle != comtitle: # # 如果标题不一样,决策为不是泛解析~,大概是80%的准确率,但是对安居客来说,这一点判断是无效的 # pass #else: if infcontent != 'Error' and comcontent != 'Error': if Return_Content_Difflib(infcontent, comcontent) == True: try: print('[+ URL Universal] 泛解析网址自动过滤 : {}'.format(url)) close_old_connections() BLACKURL.objects.create(url=url, ip=get_host(url), title=RequestsTitle(url), resons='泛解析自动过滤') return except: return else: DD1 = Get_Url_Info(url.replace('://', '://yyyyyyyyy')).get_info() comtitle1, comip1, comcontent1 = DD1['title'], DD1['ip'], DD1[ 'content'] if Return_Content_Difflib(comcontent, comcontent1) == True: try: print('[+ URL Universal] 泛解析网址自动过滤 : {}'.format(url)) close_old_connections() BLACKURL.objects.create(url=url, ip=get_host(url), title=RequestsTitle(url), resons='泛解析自动过滤') return except: return print('[+ Insert Url] 入库网址 : {}'.format(url)) try: Test_Other_Url = Other_Url.objects.filter(url=url) '''判断网络资产表是否有这个数据,如果没有的话,就添加进去''' if list(Test_Other_Url) == []: res = Get_Url_Info(url).get_info() res_url = res.get('url') try: res_title = pymysql.escape_string(res.get('title')) except Exception as e: res_title = 'Error' Except_Log(stat=11, url=url + '|网页内容转码失败', error=str(e)) res_power = res.get('power') res_server = res.get('server') res_status = res.get('status') res_ip = ip try: Other_Url.objects.create(url=res_url, title=res_title, power=res_power, server=res_server, status=res_status, ip=res_ip) except Exception as e: close_old_connections() Except_Log(stat=17, url=url + '|标题等信息编码不符合', error=str(e)) Other_Url.objects.create(url=res_url, title='Error', power='Error', server=res_server, status=res_status, ip=res_ip) except Exception as e: Except_Log(stat=29, url=url + '|网络资产表错误', error=str(e)) try: ''' 再次获取状态码,判断是否符合入库状态,以保证数据统一 这里添加网址资产到 索引表 和 清洗表 ''' test_url1 = list(URL.objects.filter(url=url)) '''如果网址索引表有这个网站的话,就直接退出''' if test_url1 == []: URL.objects.create(url=url, ip=ip) '''添加网址到网址索引表''' try: try: ZHRND = Get_Url_Info(url) Sconten = ZHRND.get_info()['content'] if Sconten == 'Error': '''到这里说明获取网页内容失败了''' # print('{}:获取网页内容失败'.format(url)) pass else: try: blackconincon = check_black(Sconten, black_con) if blackconincon == True: '''触发网页内容黑名单''' burl = '' for blacurl in black_con: if blacurl in Sconten: burl = blacurl print( '[+ Cont Blacklist] 当前网页内容触发黑名单 : {}'. format(url)) try: close_old_connections() BLACKURL.objects.create( url=url, ip=get_host(url), title=RequestsTitle(url), resons='触发网页内容黑名单:{}'.format(burl)) except Exception as e: pass return except: Sconten = '获取失败' Show_contents = pymysql.escape_string(Sconten) Cont = Content() Cont.url = url Cont.content = Show_contents IP_Res = Get_Ip_Info(ip) Show_cs = IP_Res.get_cs_name(ip) Cont.save() Show_Data.objects.create(url=url, ip=ip, cs=Show_cs, content=Cont) except Exception as e: close_old_connections() Except_Log(stat=4, url=url + '|外键添加错误', error=str(e)) Show_contents = 'Error' Cont = Content() Cont.url = url Cont.content = Show_contents IP_Res = Get_Ip_Info(ip) Show_cs = IP_Res.get_cs_name(ip) Cont.save() Show_Data.objects.create(url=url, ip=ip, cs=Show_cs, content=Cont) '''添加网页内容,数据展示''' except Exception as e: Except_Log(stat=8, url=url + '|外键添加错误', error=str(e)) This_Sub = [x for x in ALL_DOMAINS if x in url] '''获取到当前子域名属于的主域名''' try: '''尝试进行域名总数据获取检测''' if This_Sub != []: Domain_Count = Domains.objects.filter(url=This_Sub[0])[0] counts = Other_Url.objects.filter( url__contains=This_Sub[0]) Domain_Count.counts = str(len(counts)) # counts = int(Domain_Count.counts)+1 # Domain_Count.counts = counts Domain_Count.save() except Exception as e: Except_Log(stat=15, url=url + '|获取归属域名失败|' + This_Sub, error=str(e)) except Exception as e: Except_Log(stat=22, url=url + '|添加到网址索引表失败|', error=str(e)) try: test_ip = list(IP.objects.filter(ip=ip)) except: close_old_connections() test_ip = list(IP.objects.filter(ip=ip)) '''开始添加ip 维护ip统一 这里开始判断数据库中是否有这个ip,并且先添加然后修改(防止重复浪费资源) if test_ip != []: test_ip_0 = IP.objects.filter(ip=ip)[0] # 这里判断数据中IP时候存在,如果存在并且有扫描状态,就直接中断操作 if test_ip_0.get == '是' or test_ip_0.get == '中': return''' if test_ip == []: try: IP_Res = Get_Ip_Info(ip) area = IP_Res.get_ip_address(ip) cs_name = IP_Res.get_cs_name(ip) try: IP.objects.create(ip=ip, servers='None', host_type='None', cs=cs_name, alive_urls='None', area=area) '''这里先添加数据,异步执行获取到的数据作为结果给下个进程使用 这里本来是要扫描c段开放端口,但是这样就相当于把耗时操作加载到同步执行的线程中 于是把扫描开放端口 放在获取ip详细信息线程中处理''' except Exception as e: Except_Log(stat=86, url=url + '|转换IP地区编码失败|', error=str(e)) IP.objects.create(ip=ip, servers='None', host_type='None', cs=cs_name, alive_urls='None', area='获取失败') except Exception as e: Except_Log(stat=21, url=url + '|添加IP资源失败|', error=str(e)) except Exception as e: Except_Log(stat=30, url=url + '|维护传入网址失败|', error=str(e))
def Add_Data_To_Url(url): ''' 2019-12-10 1. 该函数作用为传入网址进行IP黑名单过滤 2. 该函数作用为传入网址进行【网络资产数据入库,网址索引数据入库,主机资产数据入库,监控域名数量入库处理】 ''' time.sleep(random.randint(5, 20)) time.sleep(random.randint(5, 20)) time.sleep(random.randint(5, 20)) close_old_connections() print('[+ Insert Url] 入库网址 : {}'.format(url)) if '.gov.cn' in url or '.edu.cn' in url: return urlinblackurl = check_black(url, black_url) if urlinblackurl == True: print('[+ URL Blacklist] 当前网址触发黑名单 : {}'.format(url)) return try: ip = get_host(url) if ip == '获取失败': return if ip in black_ip: '''触发IP黑名单机制''' print('[+ IP Blacklist] 当前IP触发黑名单 : {} --> {}'.format(ip, url)) return try: test_url = list(URL.objects.filter(url=url)) except: try: test_url = list(URL.objects.filter(url=url)) except: close_old_connections() test_url = list(URL.objects.filter(url=url)) if test_url != []: '''网址索引表如果已经有该网址,则直接退出''' return try: Test_Other_Url = Other_Url.objects.filter(url=url) '''判断网络资产表是否有这个数据,如果没有的话,就添加进去''' if list(Test_Other_Url) == []: res = Get_Url_Info(url).get_info() res_url = res.get('url') try: res_title = pymysql.escape_string(res.get('title')) except Exception as e: res_title = 'Error' Except_Log(stat=11, url=url + '|网页内容转码失败', error=str(e)) res_power = res.get('power') res_server = res.get('server') res_status = res.get('status') res_ip = ip try: Other_Url.objects.create(url=res_url, title=res_title, power=res_power, server=res_server, status=res_status, ip=res_ip) except Exception as e: close_old_connections() Except_Log(stat=17, url=url + '|标题等信息编码不符合', error=str(e)) Other_Url.objects.create(url=res_url, title='Error', power='Error', server=res_server, status=res_status, ip=res_ip) except Exception as e: Except_Log(stat=29, url=url + '|网络资产表错误', error=str(e)) try: ''' 再次获取状态码,判断是否符合入库状态,以保证数据统一 这里添加网址资产到 索引表 和 清洗表 ''' test_url1 = list(URL.objects.filter(url=url)) '''如果网址索引表有这个网站的话,就直接退出''' if test_url1 == []: URL.objects.create(url=url, ip=ip) '''添加网址到网址索引表''' try: try: ZHRND = Get_Url_Info(url) Sconten = ZHRND.get_info()['content'] if Sconten == 'Error': '''到这里说明获取网页内容失败了''' # print('{}:获取网页内容失败'.format(url)) pass else: try: blackconincon = check_black(Sconten, black_con) if blackconincon == True: '''触发网页内容黑名单''' print( '[+ Cont Blacklist] 当前网页内容触发黑名单 : {}'. format(url)) return None except: Sconten = '获取失败' Show_contents = pymysql.escape_string(Sconten) Cont = Content() Cont.url = url Cont.content = Show_contents IP_Res = Get_Ip_Info(ip) Show_cs = IP_Res.get_cs_name(ip) Cont.save() Show_Data.objects.create(url=url, ip=ip, cs=Show_cs, content=Cont) except Exception as e: close_old_connections() Except_Log(stat=4, url=url + '|外键添加错误', error=str(e)) Show_contents = 'Error' Cont = Content() Cont.url = url Cont.content = Show_contents IP_Res = Get_Ip_Info(ip) Show_cs = IP_Res.get_cs_name(ip) Cont.save() Show_Data.objects.create(url=url, ip=ip, cs=Show_cs, content=Cont) '''添加网页内容,数据展示''' except Exception as e: Except_Log(stat=8, url=url + '|外键添加错误', error=str(e)) This_Sub = [x for x in ALL_DOMAINS if x in url] '''获取到当前子域名属于的主域名''' try: '''尝试进行域名总数据获取检测''' if This_Sub != []: Domain_Count = Domains.objects.filter(url=This_Sub[0])[0] counts = Other_Url.objects.filter( url__contains=This_Sub[0]) Domain_Count.counts = str(len(counts)) # counts = int(Domain_Count.counts)+1 # Domain_Count.counts = counts Domain_Count.save() except Exception as e: Except_Log(stat=15, url=url + '|获取归属域名失败|' + This_Sub, error=str(e)) except Exception as e: Except_Log(stat=22, url=url + '|添加到网址索引表失败|', error=str(e)) try: test_ip = list(IP.objects.filter(ip=ip)) except: close_old_connections() test_ip = list(IP.objects.filter(ip=ip)) '''开始添加ip 维护ip统一 这里开始判断数据库中是否有这个ip,并且先添加然后修改(防止重复浪费资源) if test_ip != []: test_ip_0 = IP.objects.filter(ip=ip)[0] # 这里判断数据中IP时候存在,如果存在并且有扫描状态,就直接中断操作 if test_ip_0.get == '是' or test_ip_0.get == '中': return''' if test_ip == []: try: IP_Res = Get_Ip_Info(ip) area = IP_Res.get_ip_address(ip) cs_name = IP_Res.get_cs_name(ip) try: IP.objects.create(ip=ip, servers='None', host_type='None', cs=cs_name, alive_urls='None', area=area) '''这里先添加数据,异步执行获取到的数据作为结果给下个进程使用 这里本来是要扫描c段开放端口,但是这样就相当于把耗时操作加载到同步执行的线程中 于是把扫描开放端口 放在获取ip详细信息线程中处理''' except Exception as e: Except_Log(stat=86, url=url + '|转换IP地区编码失败|', error=str(e)) IP.objects.create(ip=ip, servers='None', host_type='None', cs=cs_name, alive_urls='None', area='获取失败') except Exception as e: Except_Log(stat=21, url=url + '|添加IP资源失败|', error=str(e)) except Exception as e: Except_Log(stat=30, url=url + '|维护传入网址失败|', error=str(e)) Add_Data_To_Url(url)
def Add_Data_To_Url(url): try: ip = get_host(url) if ip == '获取失败': return # print('[+ Domain UrlIP] IP解析 --> {} IP --> {}'.format(url, ip)) test_url = list(URL.objects.filter(url=url)) # 如果数据库有这个网站的话,就直接退出 if test_url != []: return try: Test_Other_Url = Other_Url.objects.filter(url=url) # 判断网络资产表是否有这个数据,如果没有的话,就添加进去 if list(Test_Other_Url) == []: res = Get_Url_Info(url).get_info() res_url = res.get('url') res_title = res.get('title') res_power = res.get('power') res_server = res.get('server') res_status = res.get('status') res_ip = ip if int(res_status) in Alive_Status: # 添加的标准是 在入库状态码内 Other_Url.objects.create(url=res_url, title=res_title, power=res_power, server=res_server, status=res_status, ip=res_ip) except Exception as e: print('错误代码 [29] {}'.format(str(e))) Error_Log.objects.create(url=url, error='错误代码 [29] {}'.format(str(e))) try: res = Get_Url_Info(url).get_info() res_status = res.get('status') # 再次获取状态码,判断是否符合入库状态,以保证数据统一 if int(res_status) not in Alive_Status: return # 接下来可以进行数据索引唯一统一 URL.objects.create(url=url, ip=ip) # 添加 网址索引 try: Show_contents = Get_Url_Info(url).Requests()[0] Cont = Content() Cont.url = url Cont.content = Show_contents Cont.save() Show_Data.objects.create(url=url, ip=ip, content=Cont) # 添加网页内容,数据展示 except Exception as e: print('错误代码 [08] {}'.format(str(e))) Error_Log.objects.create(url='外键添加错误', error='错误代码 [08] {}'.format(str(e))) BA = Domains.objects.all() ALL_DOMAINS = [x.get('url') for x in BA.values()] # 所有监控域名 # print('所有域名:{}'.format(ALL_DOMAINS)) This_Sub = [x for x in ALL_DOMAINS if x in url] # 获取到当前子域名属于的主域名 try: # 尝试进行域名总数据获取检测 if This_Sub != []: Domain_Count = Domains.objects.filter(url=This_Sub[0])[0] counts = Other_Url.objects.filter( url__contains=This_Sub[0]) Domain_Count.counts = str(len(counts)) # counts = int(Domain_Count.counts)+1 # Domain_Count.counts = counts Domain_Count.save() except Exception as e: print('错误代码 [15] {}'.format(str(e))) Error_Log.objects.create(url=url + '|' + This_Sub, error='错误代码 [15] {}'.format(str(e))) except Exception as e: print('错误代码 [22] {}'.format(str(e))) Error_Log.objects.create(url=url, error='错误代码 [22] {}'.format(str(e))) test_ip = list(IP.objects.filter(ip=ip)) # 开始添加ip 维护ip统一 # 这里开始判断数据库中是否有这个ip,并且先添加然后修改(防止重复浪费资源) if test_ip != []: test_ip_0 = IP.objects.filter(ip=ip)[0] # 这里判断数据中IP时候存在,如果存在并且有扫描状态,就直接中断操作 if test_ip_0.get == '是' or test_ip_0.get == '中': return if test_ip == []: try: IP_Res = Get_Ip_Info(ip) area = IP_Res.get_ip_address(ip) IP.objects.create(ip=ip, servers='None', host_type='None', alive_urls='None', area=area) # 这里先添加数据,异步执行获取到的数据作为结果给下个进程使用 except Exception as e: print('错误代码 [21] {}'.format(str(e))) Error_Log.objects.create(url=url, error='错误代码 [21] {}'.format(str(e))) except Exception as e: print('错误代码 [30] {}'.format(str(e))) Error_Log.objects.create(url=url, error='错误代码 [30] {}'.format(str(e)))
# In progress: importing all our slide content into the database, stripped down to markdown. import os, sys base = os.path.dirname(os.path.dirname(__file__)) base_parent = os.path.dirname(base) sys.path.append(base) sys.path.append(base_parent) os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'phaidra.settings') import django django.setup() import html2text from app.models import Content, Grammar, Language indir = '/opt/phaidra/static/content/en/slides/' en_lang = Language.objects.get(name='English') gr_lang = Language.objects.get(name='Ancient Greek') ref = Grammar.objects.get(ref='s1') for root, dirs, filenames in os.walk(indir): for files in filenames: with open(os.path.join(root, files), 'r') as f: text = f.read().decode('utf8') content = html2text.html2text(text) c = Content(title='UPDATE ME!', content=content, grammar_ref=ref, source_lang=en_lang, target_lang=gr_lang) c.save()