def recommendation(): by = request.form.get("by") if USER_MODE: temp = mycol.find_one({"_id": USER_ID}) db = DB() table = pd.DataFrame() result = list() if by == str(0): #author result = temp["author"] try: name = max(result, key=result.count) table = db.search_article_by_author(name) except: return {'status': 'fail'} else: result = temp["field"] try: field = max(result, key=result.count) table = db.search_by_field(field) except: return {'status': 'fail'} if table.empty: return {'status': 'fail'} else: return {"status": "success", "data": table.to_html(classes='data')} else: return {"status": "fail"}
def delete(): db = DB() delete_data = json.loads(request.form.get('data')) print(delete_data) status = db.delete_article(delete_data) db.close() return jsonify({"code": status})
def rank_author(): print("sdfdsafsdfdsa") db = DB() author = db.show_rank_author() if len(author) < 3: return jsonify({"status": "fail"}) result = {1: author[0], 2: author[1], 3: author[2]} return jsonify({"status": "success", "data": result})
def delete(): value = request.form.get('id') db = DB() result = db.delete_article_by_id(value) if result == False: return jsonify({"status": "fail"}) else: return jsonify({"status": "success"})
def query(): db = DB() query_data = json.loads(request.form.get('data')) args = list(query_data.values()) table = db.query_article(*args) db.close() if table.empty: return jsonify({"code": -1}) return jsonify({"code": 0, "data": table.to_html(classes='data')})
def rank_journal(): print("dsfasdf") db = DB() journal = db.show_rank_journal() print(journal) if len(journal) < 3: return jsonify({"status": "fail"}) result = {1: journal[0], 2: journal[1], 3: journal[2]} return jsonify({"status": "success", "data": result})
def search(): value = request.form.get('value') by = int(request.form.get('by')) db = DB() table = pd.DataFrame() if by == 0: table = db.search_article_by_id(value) elif by == 1: table = db.search_article_by_author(value) if USER_MODE and not table.empty: newvalues = {"$push": {"author": value}} mycol.update({"_id": USER_ID}, newvalues) elif by == 2: table = db.search_article_by_university(value) elif by == 3: table = db.search_article_by_title(value) elif by == 4: table = db.search_article_by_year(value) elif by == 5: table = db.search_article_by_journal(value) else: table = db.search_by_field(value) if USER_MODE and not table.empty: newvalues = {"$push": {"field": value}} mycol.update({"_id": USER_ID}, newvalues) if table.empty == True: return {"status": "fail"} temp = {"status": "success", "data": table.to_html(classes='data')} return jsonify(temp)
def insert(): author = request.form.get('author') affiliation = request.form.get('aff') citedby = request.form.get('num_citations') title = request.form.get('name') year = request.form.get('pub_year') journal = request.form.get('j_name') pub_url = request.form.get('pub_url') db = DB() result = db.insert_article(author, affiliation, citedby, title, year, pub_url, journal) if result == False: return jsonify({"status": "fail"}) else: return jsonify({"status": "success"})
def update(): by = int(request.form.get('choice')) id = request.form.get('id') value = request.form.get('value') db = DB() result = 0 if by == 0: result = db.update_article_on_citedby(id, value) elif by == 1: result = db.update_article_on_puburl(id, value) elif by == 2: result = db.update_article_on_journal(id, value) if result == False: return jsonify({"status": "fail"}) else: return jsonify({"status": "success"})
def processOne(delivery_tag, params, channel, connection): mariadb_connection = DB(host='db_dict', port=3306, user=os.environ['MYSQL_USER'], password=os.environ['MYSQL_PASSWORD'], database='crack_it') #QUERY INIT mariadb_connection.connect() print(" ") #For log clarity print("processing:") print(params["v"]) dbactioncount = [0] filename = os.path.join(tmpDirectory, params["v"]) #Check if file exist to avoid errors if os.path.isfile(filename): #Latin 1 will work for utf-8 but may mangle character if we edit the stream (see the official doc) linecount = wccount(filename) try: with open(filename, 'r', encoding="latin-1") as file: print("Processing file : " + filename) hashes, passwords = analyzeFile(file, params["w"], linecount) for item_hash in hashes: dbactioncount[0] += sendHash(item_hash, params["s"], mariadb_connection) commitIfNecessary(mariadb_connection, dbactioncount) for item_password in passwords: dbactioncount[0] += registerPassword(*item_password, params["s"], mariadb_connection) commitIfNecessary(mariadb_connection, dbactioncount) print("LAST COMMIT") mariadb_connection.commit() except ValueError: print("Nothing was found in this file. Discarding it....") finally: print("Deleting source file") os.remove(filename) print("ACK-ING the message") else: print("FILE DOES NOT EXIST...") mariadb_connection.commit() print("Discarding (ACK-ING) the message") ack_callback = functools.partial(ack_message, channel, delivery_tag) connection.add_callback_threadsafe(ack_callback)
def processOne(delivery_tag, rabbitMQ_data_array, channel, connection): db = DB(host='db_dict', port=3306, user=os.environ['MYSQL_USER'], password=os.environ['MYSQL_PASSWORD'], database='crack_it') db.connect() # Prepare requests print("------------------------------Start------------------------------") check_bdd = "SELECT str FROM hash WHERE str = %s AND clear IS NOT NULL" update_bdd_hash = "UPDATE hash SET clear = %s WHERE str = %s" insert_bdd_clear = "INSERT INTO dict (password) VALUES (%s) ON DUPLICATE KEY UPDATE seen=seen+1" # Modifier requête pour checker si même repo ne pas incrémenter seen (cf. parser) insert_bdd_clear_notAhash = "INSERT INTO dict (password, seen) VALUES (%s, %s) ON DUPLICATE KEY UPDATE seen=seen+%s" get_origin_hash_seen = "SELECT count(*) FROM origin_hash WHERE item = %s" move_origin_data = "INSERT INTO origin_dict (srckey, item) SELECT srckey, (SELECT id FROM dict WHERE password = %s) FROM origin_hash WHERE item = %s" delete_old_origin_data = "DELETE FROM origin_hash WHERE item = %s" delete_old_hash = "DELETE FROM hash WHERE id = %s" get_hash_ID = "SELECT id FROM hash WHERE str = %s" # Does the hash exist in db ? hash = rabbitMQ_data_array['value'] cursor = db.query(check_bdd, (hash,)) result = cursor.fetchone() print("Processing Hash : "+rabbitMQ_data_array['value']) hash_presence_in_bdd = True hashId=db.query(get_hash_ID, (hash,)).fetchall()[0][0] os.system("touch cracked.txt") if result == None: hash_presence_in_bdd = False success_token = False else: print("Hash already exists !") success_token = True notAHash=0 if hash_presence_in_bdd == False: # Get Hash types (numbers) for hash types in hashcat for hashTypesNumber in rabbitMQ_data_array['possibleHashTypes']: print("------------------------------NewTry------------------------------") hashType = str(hashTypesNumber) # hashs cracking print(hash, file=open('hash.txt', 'w')) print(str(path.exists("hash.txt"))) #crack = subprocess.check_output(["hashcat","-a","0", "--show", "-m", hashType, "-o", "cracked.txt", "--force", hash, "dict/dict.txt"], stderr=subprocess.STDOUT, shell=False) print("Hashtype: "+str(hashType)) """ try: #hashcat_proc= subprocess.Popen("hate_crack/hate_crack.py hash.txt "+str(hashType), encoding="latin1", input="2\n".encode(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) while hashcat_proc.poll() is None: try: for line in hashcat_proc.stdout: if "Hash-value exception" in line or "Separator unmatched" in line: notAHash+=1 print("[hate_crack] [ERROR] "+line) print("[hate_crack] "+line) except (BrokenPipeError, IOError): print ('Caught InnerBrokenPipeError') except (BrokenPipeError, IOError): print ('Caught OuterBrokenPipeError')""" try: crack = subprocess.check_output(["hate_crack/hate_crack.py", "hash.txt", str(hashType)], input="2\n".encode(), stderr=subprocess.STDOUT, shell=False) for line in crack: if "Hash-value exception" in line or "Separator unmatched" in line: notAHash+=1 print("[hate_crack] [ERROR] "+line) print("[hate_crack] "+line) except subprocess.CalledProcessError as e: print("Hashcat failed: ") print(e.output) # Success if path.isfile("hash.txt.out"): if (path.getsize("hash.txt.out") > 0): print("------------------------------Success------------------------------") success_token = True cracked = open("hash.txt.out", "r") password_data = cracked.readline().split(":") password_clear = password_data[1] cracked.close() # Clear password db insert cursor = db.query(insert_bdd_clear, (password_clear,)) db.commit() print("Dictionnary has been updated ... Added: "+password_clear) cursor = db.query(update_bdd_hash, (cursor.lastrowid,hash)) db.commit() print(cursor.rowcount, "Linked hash to dict value.") # Erase cracked.txt file #os.remove("hash.txt.out") # Create a new one #os.system("touch cracked.txt") #Path('cracked.txt').touch() os.remove("hash.txt.out") os.remove("hash.txt") if notAHash == len(rabbitMQ_data_array['possibleHashTypes']): print("Not a hash ! this is probably a password ! Saving in DB.") print("Old Hash ID: "+str(hashId)) cursor = db.query(get_origin_hash_seen, (hashId,)) count=cursor.fetchall()[0][0] cursor = db.query(insert_bdd_clear_notAhash, (hash, count, count)) cursor = db.query(move_origin_data, (hash,hashId)) cursor = db.query(delete_old_origin_data, (hashId,)) cursor = db.query(delete_old_hash, (hashId, )) db.commit() print("Done") else: print("==============") print("Not going to save in DB.") print("Errors: "+str(notAHash)) print("Hash Types : "+str(len(rabbitMQ_data_array['possibleHashTypes']))) print("Hash in question : "+hash) # Erase cracked.txt file os.system("rm cracked.txt 2> /dev/null") # Insert hash in db if the script hasn't cracked it if success_token == False: #possibleHashTypes = str(rabbitMQ_data_array['possibleHashTypes']) #val_hash = [hash, possibleHashTypes, None] #cursor.execute(insert_bdd_hash, val_hash) #mariadb_connection.commit() print("Hash not decrypted") ch.basic_ack(method.delivery_tag) print("------------------------------End------------------------------") ack_callback = functools.partial(ack_message, channel, delivery_tag) connection.add_callback_threadsafe(ack_callback)
links = soup.find_all('div', class_='pager_container') list = [] list.append('https://www.lagou.com/zhaopin/Java/?labelWords=label') for link in links: href = link.find('a', {'class': 'page_no', 'data-index': '2'}) url = href['href'].encode('utf-8') for n in range(2, 31): new_url = url[:35] + str(n) + '/' list.append(new_url) return list if __name__ == "__main__": root_url = "https://www.lagou.com/zhaopin/Java/?labelWords=label" spider = Spider() content = spider.deal_url(root_url) soup = BeautifulSoup(content, "html.parser", from_encoding="utf-8") lists = spider.get_more_url(soup) for list in lists: content = spider.deal_url(list) print '正在爬取的链接:' + list soup = BeautifulSoup(content, "html.parser", from_encoding="utf-8") #print soup datas = spider.parse(soup) #print datas db = DB() for data in datas: print data['address'] db.importDb(data['salary'], data['job'], data['address'], data['require'], data['company'])
def main(): db = DB(host='db_dict', port=3306, user=os.environ['MYSQL_USER'], password=os.environ['MYSQL_PASSWORD'], database='crack_it') db.connect() success = False while not success: try: connection = pika.BlockingConnection( pika.ConnectionParameters(host='rabbitmq')) channel = connection.channel() success = True except (pika.exceptions.AMQPConnectionError) as e: success = False print("Failed to connect to rabbitMQ ... Retrying in 5 seconds.") time.sleep(5) #create the exchange if it does not exist already channel.exchange_declare(exchange='files', exchange_type='fanout') result = channel.queue_declare(queue='parser_files_queue', exclusive=False, auto_delete=False) queue_name = result.method.queue #bind the queue to the url exchange channel.queue_bind(exchange='files', queue=queue_name) #======================================================================== #QUERY INIT _SQL = (""" SELECT * FROM source """) #QUERY EXECUTE cursor = db.query(_SQL) result = cursor.fetchall() print("Before loop") for row in result: #We call the module the row is aksing for (value: row[2]) in the crawler dict, which is a registry of all modules. We then pass it the url from the DB row #the result is an array of urls result, newsourcehint = crawlers[row[2]](row[1], row[4]) cursor = db.query( "UPDATE source SET sourceHint = %s WHERE idsource = %s;", (newsourcehint, row[0])) db.commit() # we could batch commit, but is it really worth it here ? for itemToParse in result: #assemble a json message to easely combine the two values, m=> module to use, v => url if itemToParse != None: datafiles = pre_parsers[row[2]](itemToParse, cacheDirectory, tmpDirectory) else: continue for datafile in datafiles: print("Sending one file.") message = json.dumps({ "m": row[2], "s": itemToParse, "v": datafile, "w": row[5] }) #send the message through rabbbitMQ using the urls exchange channel.basic_publish(exchange='files', routing_key='', body=message) #closing connection to rabbitMQ connection.close() print("Crawler done !")
str_all = anl.deal_str(str_new) deal_all = anl.deal_all(str_all) for new in deal_all: if new != '': dis.append(new) return dis #处理元组中为空的 def deal_str(self,str_new): new = [] for str in str_new: if str != '': new.append(str) return new #生成要处理的数组 def deal_all(self,str_all): if len(str_all) == 1: return str_all else: return range(int(str_all[0]), int(str_all[1]) + 1, 1) if __name__ == "__main__": db = DB() result = db.get_salary() anl = Analyse() salary = anl.deal_result(result) d = collections.Counter(salary) for a in d: print str(a)+":"+str(d[a])
def update(): db = DB() update_data = json.loads(request.form.get('data')) status = db.update_article(update_data) db.close() return jsonify({"code": status})
def insert(): db = DB() insert_data = json.loads(request.form.get('data')) status = db.insert_article(insert_data) db.close() return jsonify({"code": status})
from mysqldb import DB db = DB()
def index(): db = DB() temp = url_for('static', filename='style.css') return render_template('webpage.html', titles="search engine")
def __init__(self): DB.__init__(self)
#!/usr/bin/python3 from __future__ import generators import sys import os from mysqldb import DB import time from pathlib import Path from shutil import copyfile # Db connect db = DB(host='db_dict', port=3306, user=os.environ['MYSQL_USER'], password=os.environ['MYSQL_PASSWORD'], database='crack_it') db.connect() # Select all clear passwords in db select_password_clear = "SELECT password FROM dict ORDER BY seen DESC" cursor = db.query(select_password_clear) # Create the new dictionnary in txt format Path('/dict/new_dict.txt').touch() # Select a list of 1000 passwords in db and put them in dict.txt until there's no passwords left def PasswordIterator(cursor, arraysize=1000): while True: passwords = cursor.fetchmany(arraysize)