def run(timer): result = process.monitor() datastore.sync(result, expressions.parse("AdType == \"Sysmon.Process\"")) logger.info("Stored %s process samples" % len(result))
def createResultString(code, message): result = {} result['code'] = code result['message'] = message result_string = json.dumps(result, indent=4, ensure_ascii=False) logger.info(result_string) return result_string
def get_next_page(org_page, counter, org_href, proxy=None): try: after_author = org_page.select_one(".gs_btnPR").attrs["onclick"].split( "\\")[-3][3:] org_href_2 = "&".join(org_href.split("&")[:-2]) except KeyError: return None global HEADERS global BASE_URL if proxy is not None: proxies = {f'{proxy.split(":")[0].strip()}': proxy} else: proxies = None response = requests.get( f'{BASE_URL}{org_href_2}&after_author={after_author}' f'&astart={counter}', headers=HEADERS, proxies=proxies) logger.info(f'{response.status_code} {response.reason}') tree = Bs(response.content, "lxml") if tree.select_one(".gsc_pgn_ppn").text == "1-10": return None return tree
def datalayer_1(request): logger.info('datalayer_1 - METHOD STARTED with parameters: request:%s' % request) return_data = { 'service':'dataLayer', 'version':1 } if 'text' not in request.form: logger.error('datalayer_1 - ERROR post variable \'text\' not supplied') return ERROR_DATALAYER_NOTEXT text = request.form['text'] return_data = { 'service':'datalayer', 'version':1, 'status':'success' } return_data['datalayer'] = { 'text':text, 'tags':run_text_tagging(text), 'locations':run_text_locations(text), 'sentiment':run_text_sentiment(text) } logger.info('datalayer_1 - METHOD ENDED') return return_data
def get_organization_page(domain, proxy=None): global HEADERS global BASE_URL if proxy is not None: proxies = {f'{proxy.split(":")[0].strip()}': proxy} else: proxies = None response = requests.get(f"{BASE_URL}/scholar?q={domain}", headers=HEADERS, proxies=proxies) logger.info(f'{response.status_code} {response.reason}') tree = Bs(response.content, "lxml") org_href = get_org_href(tree) if not org_href: return None, None response = requests.get(f"{BASE_URL}{org_href}", headers=HEADERS, proxies=proxies) if not response.ok: return None, None tree = Bs(response.content, "lxml") return tree, org_href
def view_record(page=1, readonly=True): form = Form_IP_Add(request.form) list_opers = IpTable().get_all_oper() if request.method == 'POST': if form.validate(): old_record = IpTable() ipAdress = form.ipAdress.data hostName = form.hostName.data Owner = form.Owner.data opersys = request.form['oper-sys'] createDate = form.dateStart.data service = request.form['inputService'] old_record.edit(page, ipAdress, hostName, Owner, opersys, createDate) logger.info("Editing a record.") writeLog(request.remote_addr + ' - ' + session['user_name'] + ' - ' + 'edit ' + str(page) + ' with date ' + str(request.form)) flash('IP ' + ipAdress + " editted successfully.", category="success") m_tasks = IpTable() record = m_tasks.get_id(page) return render_template("ip_view.html", form=form, record=record, readonly=readonly, list_opers=list_opers)
def parse(contents): ''' Parse the output of the 'ps' command and return a list of records ''' lineNo = 0 processes = [] for line in contents.split("\n"): line = line.strip() lineNo = lineNo + 1 if lineNo == 1 or len(line) == 0: continue parts = line.split(None, len(properties) - 1) process = Ad("Sysmon.Process") for n in range(len(parts)): part = parts[n] property = properties[n] attribute = propertiesMap[property] value = part if property in parseMap: value = parseMap[property](part) process.setObject(attribute, value) processes.append(process) logger.info("Parsed %d processes" % len(processes)) return processes
def before_first_request(): logger.info(url_for('index')) global loaded_excel loaded_excel = False session.permanent = True app.permanent_session_lifetime = timedelta(minutes=5) logger.info( "-------------------- initializing everything ---------------------") db.create_all()
def monitor(): ''' Run a 'ps' command and return Sysmon.Process records ''' cmd = ["/bin/ps", "axww", "-o", ",".join(properties)] p = subprocess.Popen(cmd, stdout=subprocess.PIPE) logger.info("Running ps command: %s" % " ".join(cmd)) return parse(p.communicate()[0])
def t_get_author(self, author, org_domain): scraped_author = get_author(author["name"]) if scraped_author is not None: author = scraped_author author["organizations"] = [org_domain] result = insert_one("author", author) logger.info(f'<{author["name"]}> is inserted! | {result}')
def index(): new_tasks = Tasks() new_tasks.add_data("Tom Hanks", "Clean the house please", request.remote_addr, request.headers.get('User-Agent')) list_records = Tasks().list_all_tasks() for record in list_records: logger.info(record.author + " " + record.task + " " + record.user_ip + " " + record.user_agent) return "see the console"
def testing_get(): if request.method == "POST": # this will never get triggered because you didn't specify the POST method in the @app.route return "never going to show" if "search" not in request.args: logger.error("you forgot to pass the search args") return "failed." search = request.args.get("search") logger.info("the search variable is = " + search) return "you made a GET request with a parameter search = " + search
def add_record(): form = Form_New_Message(request.form) if request.method == 'POST': if form.validate(): logger.info("Processing new message.") logger.info(form.category.data + " " + form.title.data + " " + form.message.data + " " + form.author.data) flash("Your message was sent!", category="success") return render_template("add_record.html", form=form)
def testing_get(): if request.method == 'POST': # this will never get triggered because you didn't specify the POST method in the @app.route return "never going to show" if 'search' not in request.args: logger.error("you forgot to pass the search args") return "failed." search = request.args.get("search") logger.info("the search variable is = " + search) return "you made a GET request with a parameter search = " + search
def t_find_pdf_primarily(self, pub_id: str, title: str, authors: list, url: str): resd = {"status": "ok"} if url: files_path = get_config("FILES_PATH") file_name = md5(url.encode("utf-8")).hexdigest() if not os.path.exists(files_path): os.makedirs(files_path) pdf_raw = download(url) full_path = f'{files_path}{os.path.sep}{file_name}.pdf' with open(full_path, "wb+") as f: f.write(pdf_raw) resd["path"] = full_path try: content = extract_text_from_pdf(full_path) except Exception as e: resd["extraction_failure"] = str(e) logger.debug(e) content = None update_result = update_one("publication", { "filter": {"id": {"$eq": pub_id}}, "update": { "$set": { "raw_base64": base64.encodebytes(pdf_raw).decode("utf-8"), "content": content } }, "upsert": True }) logger.info(f'Update Result: {update_result}') t_elasticsearch_indexing.apply_async((pub_id,)) else: authors = find("author", { "filter": {"id": {"$in": authors}}, "projection": {"name": 1} }) t_find_pdf_secondarily.apply_async( (pub_id, title, [a["name"] for a in authors]) ) return resd
def search(index: str, text: str): query = preprocess_text(text.strip().lower()) vector = get_vector(text)["vector"] langs = get_config("LANGUAGES") search_fields = list() for lang in langs: search_fields += [f'title_{lang}', f'content_{lang}'] query_json = { "_source": ["url", "authors", "citedby", "year", "lang"] + [f'title_{l}' for l in langs], "query": { "script_score": { "query": { "bool": { "should": [{ "match": { f: query } } for f in search_fields] } }, "script": { "source": "cosineSimilarity(params.query_vector, 'vector') + 1.0", "params": { "query_vector": vector } } } }, "highlight": { "fragment_size": 100, "fields": {f: {} for f in search_fields} }, "size": 100 } url = get_config("ELASTICSEARCH") + f'/{index}/_search' response = rq.get(url, json=query_json).json() logger.info(f'Resp: {response}') return response.get("hits", {}).get("hits", [])
def add_record(): form = Form_Record_Add(request.form) if request.method == 'POST': if form.validate(): new_record = SampleTable() title = form.title.data description = form.description.data new_record.add_data(title, description) logger.info("Adding a new record.") flash("Record added successfully.", category="success") return render_template("add_record.html", form=form)
def histogram_1(request): logger.info('histogram_1 - METHOD STARTED with parameters: request:%s' % request) image = request.files['image'] return_data = { 'service':'histogram', 'version':1, 'status':'success' } return_data['imglayer'] = run_img_imaging(image) logger.info('histogram_1 - METHOD ENDED') return return_data
def facedetection_1(request): logger.info('facedetection_1 - METHOD STARTED with parameters: request:%s' % request) image = request.files['image'] return_data = { 'service':'facedetection', 'version':1, 'status':'success' } return_data['objectdetection'] = run_img_face(image) logger.info('facedetection_1 - METHOD ENDED') return return_data
def login(): if (check_login()[0]): return index() m_users = UserTable() form = Form_Login(request.form) if request.method == 'POST': if form.validate(): import hashlib m = hashlib.md5() uName = form.uName.data pWord = form.pWord.data if ('ncb-bank' in uName): import re research = re.search("(.*)\@ncb-bank", uName) domainName = research.group(1) accDomain = domainName + '@ncb-bank' logger.info(accDomain) rs = m_users.get_user(domainName, 'HelloFromDomain') try: conn = Connection('10.1.33.18', accDomain, password=pWord, auto_bind=True) if rs['ok']: session['logged_in'] = True session['user_name'] = accDomain + '.vn' session['role'] = rs['role'] logger.info("logged " + session['user_name'] + ".") writeLog(request.remote_addr + ' - ' + uName + ' - ' + 'logged') return redirect(url_for('index')) else: writeLog(request.remote_addr + ' - ' + uName + ' - ' + 'permission failed') flash('You do NOT have any permission to be here!', 'danger') except: writeLog(request.remote_addr + ' - ' + uName + ' - ' + 'login failed') flash("Wrong password or username", 'danger') else: m.update(pWord) rs = m_users.get_user(uName, str(m.hexdigest())) if rs['ok']: session['logged_in'] = True session['user_name'] = rs['name'] session['role'] = rs['role'] logger.info("logged " + session['user_name'] + ".") writeLog(request.remote_addr + ' - ' + uName + ' - ' + 'logged') return redirect(url_for('index')) else: writeLog(request.remote_addr + ' - ' + uName + ' - ' + 'login failed') flash("Wrong password or username", 'danger') return render_template("login.html", form=form)
def create_lead(): logger.info(request) logger.info(request.form) lead_form = BaseApiLeadForm(request.form) if lead_form.validate(): lead_id = add_lead(request) logger.info("lead generated" + str(lead_id)) session['customer_id'] = int(lead_id) message = {'result': "succcess"} else: logger.info(lead_form.errors) message = {'result': 'failurd'} return jsonify(message)
def update_vector(index, _id, vector, rcoef, relevance): logger.info(f'{type(relevance)}: {relevance}') sign = "+" if str(relevance).strip().lower() == "true" else "-" inline = "for (int i=0; i<ctx._source.vector.length; ++i){ctx._source.vector[i]=(ctx._source.vector[i]" + sign + "(params.vector[i]*params.rcoef))/2}" q = { "script": { "lang": "painless", "params": { "vector": list(vector), "rcoef": rcoef }, "inline": inline } } response = rq.post(get_config("ELASTICSEARCH") + f'/{index}/_update/{_id}', json=q).json() logger.info(response) return response
def novel_recormmend(): logger.info("从笔趣阁获取推荐数据...") result = {} spider = BIQUGE() logger.info("准备爬虫程序,开始爬取数据...") result = spider.getRecormmend() logger.info("获取推荐数据完成...") return result
def novel_content(): result = {} logger.info("开始获取小说章节内容...") logger.info("请求数据:" + request.data.decode("utf-8")) request_data = json.loads(request.data.decode('utf-8')) novel_content_url = request_data['url'] if None == novel_content_url or len(novel_content_url) == 0: logger.info("请求接口中没有获取到小说章节内容的URL") result['code'] = "1001" result['msg'] = "请求接口中没有获取到小说章节内容的URL" spider = BIQUGE() logger.info("准备爬虫程序,开始爬取数据...") spider_result = spider.getNovelContent(novel_content_url) return spider_result
def novel_catalog(): result = {} logger.info("从笔趣阁获取章节目录") logger.info("请求数据:" + request.data.decode('utf-8')) request_data = json.loads(request.data.decode("utf-8")) novel_url = request_data['novel_url'] if None == novel_url or len(novel_url) == 0: logger.info("没有获取到小说的链接地址") result['code'] = '1001' result['msg'] = '没有获取到小说的链接地址' return json.dumps(result, ensure_ascii=False, indent=4) spider = BIQUGE() logger.info("准备爬虫程序,开始爬取数据...") spider_result = spider.getNovelBrief(novel_url) return spider_result
def add_lead(request): logger.info("Entering add lead function") email_address = request.form['email'] fname = request.form['first_name'] lname = request.form['last_name'] billing_address = request.form['billing_address'] city = request.form['city'] state = request.form['state'] country = request.form['country'] zip_code = request.form['zip_code'] logger.info("Entering Reqests part") customer = Customer(fname=fname, lname=lname, billing_address=billing_address, city=city, state=state, country=country, zip_code=zip_code) logger.info(customer.id) db.session.add(customer) db.session.commit() return customer.id
def index(): logger.info("hello") return "hello, world"
def before_first_request(): logger.info( "-------------------- initializing everything ---------------------") db.create_all()
def before_first_request(): logger.info("-------------------- initializing everything ---------------------") db.create_all()
def t_find_pdf_secondarily(self, pub_id: str, title: str, authors: list): resd = {"status": "ok"} try: # Her authoru tek tek kontrol etmemizi sağlayan for döngüsü for single_author in authors: # author için istek atıyoruz http = urllib3.PoolManager() response = http.request( 'GET', 'https://libgen.is/scimag/?q=' + single_author) html_text = response.data soup = BeautifulSoup(html_text, 'html.parser') # arama sonucunda data döndü mü onu kontrol ediyoruz try: total_value = str( soup.find('div', attrs={ 'style': 'float:left' }).getText()).split(" ")[0] except Exception: total_value = 0 # eğer arama sonucunda bir data dönmedi ise diğer yazare # geçmesi için continue diyoruz döngüye if total_value == 0: continue # burada sayfa sayısını hesaplıyoruz. double ile bölmede kalan # muhabbetlerinden ötürü kontrol yapıp gerekliyse # toplam sayfa sayısına bir ekliyoruz en son sayfayı ıskalamamak için total_page_dobule = int(total_value) / 25 total_page = int(int(total_value) / 25) if total_page != total_page_dobule: total_page += 1 # Burda bir yazarın sonuçlarını taramak için sayfalarda geziyoruz. # İlk sayfa için yukarıda istek atmıştık 0'dan farklı bir sonuç # sayısı varsa buraya gelmiştik. # bu yüzden ilk sayfa için istek atmıyoruz. # eğer ilk sayfada sonuç bulunmazsa ve sayfa sayısı 1'den büyük # ise döngünün en sonunda istek atıyor # ve döngü yeni sayfanın içinde arama yapacak şekilde devam ediyor for i in range(total_page): counter = 0 for row in soup.find_all('tr'): if counter == 0: # For initial row. Because it contains table information of page counter += 1 continue row_item = row.find_all('td') row_title = row_item[1].find_all('a')[0].text ratio = fuzz.ratio(row_title.lower(), title.lower( )) # row title ve verilen title benzer mi diye bakılıyor if ratio > 75: url_for_get = row_item[4].find_all('li') href = url_for_get[1].find_all('a', href=True)[0]['href'] response_for_pdf = http.request('GET', href) pdf_page = BeautifulSoup(response_for_pdf.data, 'html.parser') pdf_url = pdf_page.find_all( 'td', {'align': 'center'})[0].find_all( 'a', href=True)[0]['href'] pdf_raw = download(pdf_url) files_path = get_config("FILES_PATH") if not os.path.exists(files_path): os.makedirs(files_path) file_name = md5(pdf_url.encode("utf-8")).hexdigest() full_path = f'{files_path}{os.path.sep}{file_name}.pdf' with open(full_path, "wb+") as f: f.write(pdf_raw) resd["path"] = full_path try: content = extract_text_from_pdf(full_path) except Exception as e: resd["extraction_failure"] = str(e) logger.debug(e) content = None update_one( "publication", { "filter": { "id": { "$eq": pub_id } }, "update": { "$set": { "raw_base64": base64.encodebytes(pdf_raw).decode( "utf-8"), "content": content } }, "upsert": True }) if content: logger.info(f'Content is added to publication.') t_elasticsearch_indexing.apply_async((pub_id, )) return resd if total_page > 1: response = http.request( 'GET', 'https://libgen.is/scimag/?q=' + single_author + '&page=' + str(i + 2)) html_text = response.data soup = BeautifulSoup(html_text, 'html.parser') except Exception as e: logger.exception(e) t_elasticsearch_indexing.apply_async((pub_id, )) return resd
from elasticsearch import NotFoundError
from application import celery, logger
def t_elasticsearch_indexing(self, pub_id: str): resd = {"status": "ok"} pub = find_one("publication", { "filter": {"id": {"$eq": pub_id}, "vector": {"$exists": True}} }) pub["authors"] = find("author", { "filter": {"id": {"$in": pub.get("authors", [])}}, "projection": ["id", "name", "affiliation", "citedby", "interests", "organizations"] }) pub.pop("created_at", None) pub.pop("raw_base64", None) pub.pop("title_md5", None) pub.pop("_id", None) pub_id = pub.pop("id") vector_field_tokens = list() if pub.get("content", None): vector_field_tokens += pub["content"].split() if not pub["title"].startswith("unk_"): vector_field_tokens += pub["title"].split() vector_field = " ".join(vector_field_tokens) vectorizer_response = get_vector(preprocess_text(vector_field)) pub["lang"] = vectorizer_response["lang"] pub["vector"] = vectorizer_response["vector"] langs = get_config("LANGUAGES") for lang in langs: if lang != pub["lang"]: pub[f'title_{lang}'] = preprocess_text( translate(pub["title"], lang) or "" ) if str(pub.get("content", None)).strip().lower() not in ["none", ""]: pub[f'content_{lang}'] = preprocess_text( translate(pub["content"], lang) or "" ) else: pub[f'title_{lang}'] = preprocess_text(pub["title"]) pub[f'content_{lang}'] = preprocess_text(pub.get("content", "") or "") if "title" in pub: del pub["title"] if "content" in pub: del pub["content"] update_one("publication", { "filter": {"id": {"$eq": pub_id}}, "update": {"$set": {"vector": pub["vector"], "lang": pub["lang"]}} }) for lang in langs: publication_mappings["properties"][f'title_{lang}'] = {"type": "text"} publication_mappings["properties"][f'content_{lang}'] = {"type": "text"} resp = rq.put( get_config("ELASTICSEARCH") + "/publication", json={"mappings": publication_mappings} ) if resp.status_code == 400: resp = rq.put( get_config("ELASTICSEARCH") + "/publication/_mappings", json=publication_mappings ) logger.info(f'Mapping Response: {resp.json()}') # resp = es.indices.create( # index="publication", # body={"mappings": publication_mappings}, # ignore=400 # ) result = es.index(index="publication", body=pub, id=pub_id) resd["result"] = result return resd
import json
def before_first_request(): logger.info("initializing the database >>>>>>>") db.create_all()
import base64
def t_scrape_publications_of_author(self, author_id, author_name): resd = {"status": "ok"} author_info = next(search_author(author_name)).fill() updates = list() counter = 1 for publication in author_info.publications: pub_id = publication.id_citations.split(":")[1].strip() title = publication.bib.get("title", f'unk_{counter}') if not title.startswith("unk_"): title = preprocess_text(title) if title.strip() == "": continue pub_in_mongo = find_one("publication", {"filter": { "id": { "$eq": pub_id } }}) if not pub_in_mongo: publication = publication.fill().__dict__ publication["id"] = pub_id publication["title_md5"] = md5(title.encode("utf-8")).hexdigest() publication["created_at"] = datetime.datetime.now().isoformat() publication["authors"] = [author_id] publication.pop("id_citations", None) publication.pop("_filled", None) publication.pop("source", None) publication = { **publication, **publication.pop("bib", dict()), "title": title } insert_result = insert_one("publication", publication) logger.info(f'<{publication["title"]}> | {insert_result}') if pub_in_mongo: if pub_in_mongo.get("title", None) is None: publication = publication.fill().__dict__ publication["title_md5"] = md5( title.encode("utf-8")).hexdigest() publication["created_at"] = datetime.datetime.now().isoformat() publication["authors"] = [author_id] + pub_in_mongo.get( "authors", list()) publication.pop("id_citations", None) publication.pop("_filled", None) publication.pop("source", None) publication = { **publication, **publication.pop("bib", dict()), "title": title } update_result = update_one( "publication", { "filter": { "id": { "$eq": pub_id } }, "update": { "$set": publication } }) logger.info(f'<{publication["title"]}> | {update_result}') else: updates.append(pub_id) publication = pub_in_mongo publication["authors"] = list( set(publication.get("authors", list()) + [author_id])) logger.info(f'Pub is updated!') update_one( "author", { "filter": { "id": { "$eq": author_id } }, "update": { "$addToSet": { "publications": pub_id } } }) t_find_pdf_primarily.apply_async( (pub_id, title, publication["authors"], publication.get("eprint", None))) counter += 1 sleep(int(random() * 2)) update_one( "publication", { "filter": { "id": { "$in": updates } }, "update": { "$addToSet": { "authors": author_id } } }) resd["num_publications"] = counter return resd
def novel_bookshelf(): ''' 书架模块 包括书架查询 书架新增 区别参数: method: 'SELECT' --查询 'INSERT' --新增 openid novelInfo ''' result = {} db.connect() logger.info("书架模块...") logger.info("请求数据:" + request.data.decode("utf-8")) request_data = json.loads(request.data.decode('utf-8')) openid = request_data['openid'] logger.info("openid:" + openid) if 0 == len(openid): db.close() return createResultString("1001", "请求数据中没有获取到openid信息") method = request_data['method'] logger.info("method:" + method) if 0 == len(method): db.close() return createResultString("1001", "请求数据中没有找到method信息") elif str.upper(method) not in ['SELECT', 'INSERT']: db.close() return createResultString("1001", "METHOD字段[%s]不合法" % (method)) novelInfo = request_data['novelInfo'] logger.info("小说信息:" + str(novelInfo)) if 0 == len(novelInfo): db.close() return createResultString("1001", "请求数据中没有找到novelInfo信息") if 'novelUrl' not in novelInfo.keys(): db.close() return createResultString("1001", "novelInfo中没有novelUrl") logger.info(str.upper(method)) logger.info("SELECT" == str.upper(method)) if "SELECT" == str.upper(method): logger.info("查询用户的书架信息...") sql = "select count(*) from novel_bookshelf where openid = '%s' and novel_url = '%s'" % ( openid, novelInfo['novelUrl']) logger.info("查询书架SQL:" + sql) db_result = db.executeSql(sql) logger.info("数据库查询结果:" + str(db_result)) if 0 == db_result[0][0]: db.close() return createResultString( "1000", "用户[%s]书架上没有[%s]信息!" % (openid, novelInfo['novelUrl'])) else: db.close() return createResultString( "0000", "该用户[%s]书架上已有[%s]的信息" % (openid, novelInfo['novelUrl'])) elif "INSERT" == str.upper(method): logger.info("加入书架操作...") # 首先需要查询是否在书架中,如果不在再进行插入操作 sql = "select count(*) from novel_bookshelf where openid = '%s' and novel_url = '%s'" % ( openid, novelInfo['novelUrl']) logger.info("查询书架SQL:" + str(sql)) db_result = db.executeSql(sql) logger.info("数据库查询结果:" + str(db_result)) if 1 == db_result[0][0]: logger.info("该小说[%s]已在书架中" % (novelInfo['novelUrl'])) db.close() return createResultString("1111", "该小说[%s]已在书架中" % (novelInfo['novelUrl'])) else: sql = "INSERT INTO NOVEL_BOOKSHELF(openid, novel_name, novel_url, novel_author, novel_image, novel_brief) \ VALUES('%s', '%s', '%s', '%s', '%s', '%s')" % \ (pymysql.escape_string(openid), \ pymysql.escape_string(novelInfo['novelName']), \ pymysql.escape_string(novelInfo['novelUrl']), \ pymysql.escape_string(novelInfo['novelAuthor']), \ pymysql.escape_string(novelInfo['novelImage']), \ pymysql.escape_string(novelInfo['novelBrief'])) logger.info("执行SQL语句:") logger.info(sql) db.executeSql(sql) db.close() return createResultString("0000", "加入书架成功.")