def selects_mongo(self, host, port, conn, name_file, number_loops, db, collection, value_eq, value_neq, value_many, value_contains): mongo = Mongo() if conn: pass else: conn = mongo.create_connexion(host, port) db = conn[db] print conn print db times = [] print "connected" for i in range(0, number_loops): print "Iteration " + str(i) t = self.aux_selects_mongo(db, collection, value_eq, value_neq, value_many, value_contains) times.append(t) print times # todo save the results conn.close() # Write results mongo try: f = open("/results/" + name_file + ".json", "w") json.dump(times, f) f.close() except Exception, e: print e print "error saving results in postgres.json"
def __init__(self, application, request, **kwargs): super().__init__(application, request, **kwargs) self.json = {} with contextlib.suppress(ValueError): self.json: dict = json.loads(self.request.body) self.instance = getattr(self.adapter_module, self.class_name)() self.db = Mongo() self.ban_yellow_nazi()
def main(argv): parameters = Parameters(argv) hostname = parameters.getHostname() port = parameters.getPort() dbname = parameters.getDBName() language_1, language_2 = parameters.getLanguage() collection = parameters.getCollection() filexml_1 = parameters.getInputFile_1() filexml_2 = parameters.getInputFile_2() type_corpus = parameters.getType() print 'Using parameters of configuration: ' print '- Host : ',hostname print '- Port : ',port print '- Coll : ',collection print '- DBase: ',dbname print '- XML1 : ',filexml_1 print '- XML2 : ',filexml_2 database = Mongo(hostname, dbname, collection) dic_content_1 = OrderedDict() parserxml_1 = XML(filexml_1, language_1) dic_content_1 = parserxml_1.getContent() size_1 = len(dic_content_1) del parserxml_1 dic_content_2 = OrderedDict() parserxml_2 = XML(filexml_2, language_2) dic_content_2 = parserxml_2.getContent() size_2 = len(dic_content_2) del parserxml_2 counter = 1 if size_1 == size_2: #As both files come from WebAligner, they must have the same number of documents for id_order in dic_content_1: id_file_1 = dic_content_1[id_order]['id_file'] language_1 = dic_content_1[id_order]['language'] content_1 = dic_content_1[id_order]['content'] id_file_2 = dic_content_2[id_order]['id_file'] language_2 = dic_content_2[id_order]['language'] content_2 = dic_content_2[id_order]['content'] if database.exists(language_1, id_file_1): if not database.exists(language_2, id_file_2): database.insertInExisting(language_1, id_file_1, language_2, id_file_2, content_2) else: if database.exists(language_2, id_file_2): database.insertInExisting(language_2, id_file_2, language_1, id_file_1, content_1) else: database.insertNewData(language_1, id_file_1, content_1, language_2, id_file_2, content_2, type_corpus, counter) counter += 1 else: #Files have different number of documents, so they are not aligned print '\nError: Files not aligned. Please align them with WebAligner.'
def select_test_thread_host(): s = SelectTest() m = Mongo() c = m.create_connexion(host_no__mongo_cluster, port_no__mongo_cluster) db1 = c.arkis threads_select_test(s, db1) # select_test_thread_host() # select_test_thread_cluster()
def create_full_index(): with open('config.json') as data_file1: data1 = json.load(data_file1) mongo = Mongo() host = data1['host_multitenant'] port = int(data1['port_multitenant']) db_name = 'arkis' number_tenants = data1['number_tenants'] for user in range(0, number_tenants): print user conn = mongo.create_connexion(host, port + user) conn.arkis.documents.create_index([('blob', pymongo.TEXT)]) print "created full text index"
def restore_dictionary(self, db=Mongo(), filename=None): if filename is None: words = db.get_words() for word in words: self.register_word(word['word'], score=word['score']) else: self.dictionary = FileSave.load(filename)
def save_dictionary(self, db=Mongo(), filename=None): if filename is None: for word in self.dictionary.keys(): score = self.dictionary[word] db.save_word(word, score) else: FileSave.save(self.dictionary, filename)
class BaseHandler(web.RequestHandler): executor = ThreadPoolExecutor(200) class_name = f"Fake{adapter}Resource" adapter_module = importlib.import_module(f"{adapter}") def __init__(self, application, request, **kwargs): super().__init__(application, request, **kwargs) self.json = {} with contextlib.suppress(ValueError): self.json: dict = json.loads(self.request.body) self.instance = getattr(self.adapter_module, self.class_name)() self.db = Mongo() self.ban_yellow_nazi() def ban_yellow_nazi(self): if self.db.is_user_blocked(self.get_current_user()): self.set_status(HTTPStatus.FORBIDDEN, "You don't deserve it.") real_ip = AntiCrawler(self).get_real_ip() AntiCrawler(self).imprisonment(real_ip) def write_error(self, status_code, **kwargs): if status_code in [ HTTPStatus.FORBIDDEN, HTTPStatus.INTERNAL_SERVER_ERROR, HTTPStatus.UNAUTHORIZED, HTTPStatus.NOT_FOUND ]: self.write(str(kwargs.get('exc_info'))) def data_received(self, chunk): pass def get_current_user(self) -> str: username = self.get_secure_cookie("username") or b"" return username.decode("u8")
def mongo_check_data(name_collection, name_test): # CONNECT AND GET COLLECTION name_database = "arkis" mongo = Mongo() conn = mongo.create_connexion(host_resilience_mongo, port_resilience_mongo) database = mongo.create_database(conn, name_database) mongo.create_collection(database, name_collection) # GET COUNT AND GET REAL COUNT try: r = mongo.get_all_data(database[name_collection]) real_count = r.count() count = get_count(name_test) res = {'real_count': real_count, 'count': count} mongo.write_results(res, name_test) print real_count print count except Exception, e: print e print "Problem geting data"
def insert_mongo(self, host, port, database, one, conn, name_test, user, pwd): # sleep(15) inserts_time_one = 0 delete_table_time = 0 create_database_time = 0 create_table_time = 0 close_connexion_time = 0 create_connexion_time = 0 time_mongo_start = time() mongo = Mongo() # Create connexion try: create_connexion_start = time() if conn: pass else: conn = mongo.create_connexion(host, port, user, pwd) create_connexion_end = time() create_connexion_time = create_connexion_end - create_connexion_start print "connected" except Exception, e: print e print "connection with mongo problem"
def __init__(self): super().__init__() self.log = self.get_logger("Inspector") self.log.debug("Starting Repo Check Inspector...") self.pedant = Pedant() self.mq = RabbitMq(environ["RMQ_HOST"], environ["RMQ_USER"], environ["RMQ_PASSWORD"]) self.log.debug("Inspection queue connection established.") self.sql = MySql(environ["MYSQL_HOST"], environ["MYSQL_USER"], environ["MYSQL_PASSWORD"]) self.log.debug("MySQL server connection established.") self.mongo = Mongo(environ["MONGO_HOST"]) self.log.debug("MongoDB server connection established.") self.idle_interval = int(environ["INSPECTOR_IDLE_INTERVAL"]) self.doc_exts = [ # file exts that are inspected "html", "md" ] self.third_party_paths = [ # ignored "/vendor/" ] self.log.debug("Starting Inspector...")
def mongo_test(name_collection, name_test, type_insert, type_test): name_database = "arkis" mongo = Mongo() # CONNECT conn = mongo.create_connexion(host_resilience_mongo, port_resilience_mongo) # GET OR CREATE DATABASE database = mongo.create_database(conn, name_database) # CLEAN DATABASE try: mongo.delete_collection(database[name_collection]) except Exception, e: print e print "problem connect mongo"
def main(): questions = """ How many total Characters are there? How many of each specific subclass? How many total Items? How many of the Items are weapons? How many are not? How many Items does each character have? (Return first 20 rows) How many Weapons does each character have? (Return first 20 rows) On average, how many Items does each Character have? On average, how many Weapons does each character have? """ questions = questions.split("\n")[1:-1] user = os.getenv("DB_USER") password = os.getenv("DB_PASSWORD") db = "rpg" collection = "rpg_collection" rpg = Mongo(user, password, db, collection) print(questions[0].strip()) print(rpg.count()) print("\n") print(questions[1].strip()) subclasses = ['fighter', 'mage', 'cleric', 'thief'] for subclass in subclasses: count = rpg.count({subclass: {"$exists": True}}) print(f"{subclass}: {count}") print("\n") print(questions[2].strip()) print(rpg.count_distinct("inventory")) print("\n") print(questions[3].strip()) print( list( rpg.query({"inventory.power": { "$exists": True }}, { "inventory.name": 1, "inventory.power": 1 })))
def main(argv): parameters = Parameters(argv) hostname = parameters.getHostname() port = parameters.getPort() dbname = parameters.getDBName() language_1, language_2 = parameters.getLanguage() collection = parameters.getCollection() fileinput_1 = parameters.getInputFile_1() fileinput_2 = parameters.getInputFile_2() type_corpus = parameters.getType() print 'Using parameters of configuration: ' print '- Host : ',hostname print '- Port : ',port print '- Coll : ',collection print '- DBase: ',dbname print '- File1: ',fileinput_1 print '- File2: ',fileinput_2 database = Mongo(hostname, dbname, collection) id_file_1 = (fileinput_1.split('/'))[-1] id_file_2 = (fileinput_2.split('/'))[-1] try: file_1 = codecs.open(fileinput_1, 'r', 'utf-8') except IOError: print 'ERROR: System cannot open the '+fileinput_1+' file' sys.exit(2) try: file_2 = codecs.open(fileinput_2, 'r', 'utf-8') except IOError: print 'ERROR: System cannot open the '+fileinput_2+' file' sys.exit(2) #Sentences indexed by the number of the line : number_line = _id (sentence) line_number = 1 lines_2 = file_2.readlines() for counter, content_1 in enumerate(file_1): content_2 = lines_2[counter] if not database.exists(language_1, id_file_1) and not database.exists(language_2, id_file_2): database.insertNewData(language_1, id_file_1, content_1, language_2, id_file_2, content_2, type_corpus, line_number) else: if database.existsSentence(language_1, id_file_1, line_number): if not database.existsSentence(language_2, id_file_2, line_number): database.insertInExistingSentence(language_1, id_file_1, language_2, id_file_2, content_2, line_number) else: if database.existsSentence(language_2, id_file_2, line_number): database.insertInExistingSentence(language_2, id_file_2, language_1, id_file_1, content_1, line_number) else: database.insertNewSentence(language_1, id_file_1, content_1, language_2, id_file_2, content_2, line_number) if (line_number % 1000 == 0): print 'Indexing line: ',line_number line_number += 1
#! /usr/bin/env python # coding: utf-8 from Mongo import Mongo db = Mongo() from MyCrypto import MyCrypto from cursesmenu import * from cursesmenu.items import * import time import getpass import os from terminaltables import AsciiTable from halo import Halo # global variables menu = [] logged_user = [] def get_password(): while (True): password = getpass.getpass("Account password:"******"password"]): return password else: print("Incorrect password! Try again...") def register():
class SchoolApatment: __doc__ = '''这是一个基于 python 的爬虫,主要爬取省份,大学,大学学院及大学宿舍信息。并将信息 保存至 MongoDB 中。''' def __init__(self): self.session = Session() self.m = Mongo('数据库连接', 27017, '数据库名称') # 大学信息 def university(self): # 获取数据 data = self.session.get( 'https://www.wooyang.ml/data/allunivlist.txt').content # 将str 转 list data = eval(data) for i in data: # 将数据存到对应collection 中 self.m.insert(collection='country', data={ 'id': i['id'], 'country_name': i['name'] }) for j in i['univs']: self.m.insert(collection='university', data={ 'id': j['id'], 'university_name': j['name'] }) # 学院信息 def college(self): # 从数据库中,取出需要的数据 for university in self.m.find(collection='university'): college_list = [] # 获取数据 data = self.session.get( url='http://www.renren.com/GetDep.do?id=%d' % university['id']) soup = BeautifulSoup(data.text, 'lxml') for j in soup.stripped_strings: if j.encode('utf-8') == '院系': pass else: college_list.append(j) self.m.insert(collection='college', data={ 'id': university['id'], 'college': college_list }) # 宿舍信息 def apartment(self): # 设置Cookie self.session.headers.update( {'Cookie': 't=0e10866787e0a7fc1ac190a4d60c4f9e2'}) for university in self.m.find(collection='university'): apartment_list = [] data = self.session.post(url='http://www.renren.com/GetDorm.do', data={'id': university['id']}) soup = BeautifulSoup(data.text, 'lxml') for j in soup.stripped_strings: if j.encode('utf-8') == '宿舍': pass else: apartment_list.append(j) self.m.insert(collection='apartment', data={ 'id': university['id'], 'college': apartment_list })
from Crawler import Crawler from Mongo import Mongo from ClusteringAlgorithms.Indexer import Indexer from ClassificationAlgorithms.Dictionary import Dictionary from FileSave import save from ClusteringAlgorithms.RecursiveKMeansClustering import RecursiveKMeansClustering import keywords as kw from ClusteringAlgorithms.string_tools import get_intersection, get_extra_words keywords = kw.primary_keywords db_name = "prod1" db_address = "mongodb://localhost:27018" indexing_interval = 2 * 3600 # seconds window_range = 24 # hours db = Mongo(address=db_address, name=db_name) crawler = Crawler(db, keywords=keywords, verbose=False) indexer = Indexer(db=db, hour_range=window_range) clustering_method = RecursiveKMeansClustering() dico = Dictionary('computing') dico.restore_dictionary(filename="data/raw_dictionary.pkl") detected_vulnerabilities = [] def process_cluster(cluster, verbose=False): is_new_vulnerability = True for stored_cluster in detected_vulnerabilities:
if self.verbose: print('Tweet dropped : ' + str(tweet)) except Exception: print('Processing of a tweet failed') def run(self): try: print("Beginning of crawling") if self.keywords is None: self.stream.get_sample() else: self.stream.get_filter(self.keywords) except KeyboardInterrupt: if self.store_to_file: save(self.tweets, self.filename) print("\nEnd of crawling") if self.verbose: print(str(self.count) + " tweets added this session") print( str(self.db.number_of_tweets()) + " tweets in the database") if __name__ == '__main__': keywords = kw.primary_keywords db = Mongo(address="mongodb://localhost:27018", name="kw2") crawler = Crawler(db=db, keywords=keywords, extended_keywords=kw.all_keywords) crawler.run()
from Mongo import Mongo from ClassificationAlgorithms.Dictionary import Dictionary from ClusteringAlgorithms.RecursiveKMeansClustering import RecursiveKMeansClustering from statistics import show_statistics_of_clusters as statistics # Main script # It first loads tweets from either a Mongo database or a file # Then create the clusters with a given clustering method # Optionally, it can load a dictionary from a file and classify the clusters # Finally it describes the clusters, show some statistics and save the clusters and the dictionary use_dictionary = False from_db = True if from_db: # load indexer with tweets via db indexer = Indexer(Mongo(name="kw2")) else: # load indexer with tweets from file indexer = Indexer(filename="data/tweets_extensive_db.pkl") # create clustering method clustering_method = RecursiveKMeansClustering() # launch clustering process indexer.create_clusters(clustering_method, print_progress=True) # create raw dictionary from file dico = Dictionary('computing') if use_dictionary: dico.restore_dictionary(filename="dictionary.pkl") # Classify from dictionary
def __init__(self): self.session = Session() self.m = Mongo('数据库连接', 27017, '数据库名称')
# -*- coding: utf-8 -*- from random import choice from numpy import array, dot, random from Perceptron import Perceptron from Mongo import Mongo import pymongo, datetime, bson, time import time import csv if __name__ == "__main__": start = time.time() border = 5 # scale of gray that is considered black unit_step = lambda x: 0 if x < 0 else 1 mongo = Mongo() training_data = [ (array([0, 0, 1]), 0), (array([0, 1, 1]), 1), (array([1, 0, 1]), 1), (array([1, 1, 1]), 1), ] test_data = array([0, 0, 1]) print 'Creating perceptrons...' results = [] perceptrons = [] for i in range(10): print 'Creating perceptron to number', i perceptrons.append(Perceptron(100, 783))
def main(argv): parameters = Parameters(argv) hostname = parameters.getHostname() port = parameters.getPort() dbname = parameters.getDBName() language_1, language_2 = parameters.getLanguage() collection = parameters.getCollection() input_folder = parameters.getInputFolder() type_corpus = parameters.getType() print 'Using parameters of configuration: ' print '- Host : ',hostname print '- Port : ',port print '- Coll : ',collection print '- DBase: ',dbname print '- Input: ',input_folder database = Mongo(hostname, dbname, collection) try: root, dirs, files = os.walk(input_folder+''+language_1+'/').next()[:3] except IOError: print 'ERROR: It was not possible to open the '+input_folder+'en/ folder' sys.exit(2) for corpus_file in files: #if (corpus_file ~ "/~/$"): if not '.txt~' in corpus_file: print 'Working on file: '+corpus_file id_file_1 = language_1+'_'+corpus_file[0:-4] id_file_2 = language_2+'_'+corpus_file[0:-4] try: file_1 = codecs.open(input_folder+''+language_1+'/'+corpus_file, 'r', 'utf-8') except IOError: print 'ERROR: System cannot open the '+root+''+corpus_file+' file' sys.exit(2) try: file_2 = codecs.open(input_folder+''+language_2+'/'+corpus_file, 'r', 'utf-8') except IOError: print 'ERROR: System cannot open the '+root+'../'+language_2+'/'+corpus_file+' file' sys.exit(2) #Sentences indexed by the number of the line : number_line = _id (sentence) line_number = 1 lines_2 = file_2.readlines() content_1 = '' content_2 = '' for counter, line in enumerate(file_1): if re.match('(^<)', line): if content_1 != '' and content_2 != '': if not database.exists(language_1, id_file_1) and not database.exists(language_2, id_file_2): database.insertNewData(language_1, id_file_1, content_1, language_2, id_file_2, content_2, type_corpus, line_number) else: if database.existsSentence(language_1, id_file_1, line_number): if not database.existsSentence(language_2, id_file_2, line_number): database.insertInExistingSentence(language_1, id_file_1, language_2, id_file_2, content_2, line_number) else: if database.existsSentence(language_2, id_file_2, line_number): database.insertInExistingSentence(language_2, id_file_2, language_1, id_file_1, content_1, line_number) else: database.insertNewSentence(language_1, id_file_1, content_1, language_2, id_file_2, content_2, line_number) line_number += 1 content_1 = '' content_2 = '' if (line_number % 100 == 0): print 'Indexing line: ',line_number else: content_1 += line content_2 += lines_2[counter] file_1.close() file_2.close()
def main(argv): parameters = Parameters(argv) hostname = parameters.getHostname() port = parameters.getPort() dbname = parameters.getDBName() language_1, language_2 = parameters.getLanguage() collection = parameters.getCollection() input_folder = parameters.getInputFolder() type_corpus = parameters.getType() print 'Using parameters of configuration: ' print '- Host : ',hostname print '- Port : ',port print '- Coll : ',collection print '- DBase: ',dbname print '- Input: ',input_folder database = Mongo(hostname, dbname, collection) try: root, dirs, files = os.walk(input_folder).next()[:3] except IOError: print 'ERROR: It was not possible to open the '+input_folder+' folder' sys.exit(2) name_folder = (input_folder.split('/'))[-2] dic_files = {} for corpus_file in files: print 'Working on file: '+corpus_file if not re.match('~$', corpus_file): id_file = corpus_file[0:-7] language = corpus_file[-6:-4] if not dic_files.has_key(id_file): dic_files[id_file] = {'language_1': language} else: dic_files[id_file]['language_2'] = language counter = 1 for filename in dic_files: language_1 = dic_files[filename]['language_1'] language_2 = dic_files[filename]['language_2'] id_file_1 = name_folder+'_'+filename+'_'+language_1 id_file_2 = name_folder+'_'+filename+'_'+language_2 try: file_1 = codecs.open(input_folder+''+filename+'_'+language_1+'.snt', 'r', 'utf-8') except IOError: print 'ERROR: System cannot open the '+input_folder+''+filename+'_'+language_1+'.snt file' sys.exit(2) try: file_2 = codecs.open(input_folder+''+filename+'_'+language_2+'.snt', 'r', 'utf-8') except IOError: print 'ERROR: System cannot open the '+input_folder+''+filename+'_'+language_2+'.snt file' sys.exit(2) content_1 = '' for line in file_1: #if line.strip(): content_1 += line content_2 = '' for line in file_2: #if line.strip(): content_2 += line if database.exists(language_1, id_file_1): if not database.exists(language_2, id_file_2): database.insertInExisting(language_1, id_file_1, language_2, id_file_2, content_2) else: if database.exists(language_2, id_file_2): database.insertInExisting(language_2, id_file_2, language_1, id_file_1, content_1) else: database.insertNewData(language_1, id_file_1, content_1, language_2, id_file_2, content_2, type_corpus, counter) counter += 1
from Mongo import Mongo mongo = Mongo('127.0.0.1', 'escola') while True: menu = """ 1) Inserir 2) Alterar 3) 4) 5) Digite uma opção: """ op = int(input(menu)) if op == 1: nome = input('Insira o nome: ') email = input('Insira o email: ') mongo.inserir(nome, email) if mongo: print("Inserido com sucesso") elif op == 2: email = input('Insira o email: ') nome = input('Insira o novo nome: ') mongo.alterar(email=email, nome=nome) elif op == 3: pass
# -*- coding: utf-8 -*- import csv import pymongo, datetime,bson,time from numpy import array, dot, random from Perceptron import Perceptron from Mongo import Mongo if __name__ == "__main__": mongo = Mongo() myfile = open('result.csv', 'wb') wr = csv.writer(myfile,delimiter=',', quoting=csv.QUOTE_ALL) data = mongo.getResults() i = 0 for row in data: if i == 0: wr.writerow(['eta 0.15','','','','','','','','']) wr.writerow(['Número','positive','negative','false positive','false negative','accuracy','precision','recall','true_negative']) elif i == 10: wr.writerow(['eta 0.2','','','','','','','','']) wr.writerow(['Número','positive','negative','false positive','false negative','accuracy','precision','recall','true_negative']) elif i == 20: wr.writerow(['eta 0.3','','','','','','','','']) wr.writerow(['Número','positive','negative','false positive','false negative','accuracy','precision','recall','true_negative']) elif i == 30: wr.writerow(['eta 0.4','','','','','','','','']) wr.writerow(['Número','positive','negative','false positive','false negative','accuracy','precision','recall','true_negative']) wr.writerow([int(row['number']),int(row['positive']),int(row['negative']),int(row['false_positive']),int(row['false_negative']),float(row['result']['accuracy']),row['result']['precision'],row['result']['recall'],row['result']['true_negative']]) i+=1 myfile.close()
help="Base de donnée", metavar='Base', dest="selectBase") groupWarning.add_argument("--all", help="Toute les collections", action="store_true", dest="all") groupWarning.add_argument("-C", help="selectionne la collection", metavar="COLLECTION", dest="selectCollection") args = parser.parse_args() mongo = Mongo(HOST, PORT, USER, PASSWORD) if (args.base): mongo.showDataBase() if (args.collection): mongo.showListCollection(args.selectBase) if (args.selectCollection and args.selectBase and args.printCollection): print(mongo.printCollection(args.selectBase, args.selectCollection)) if (args.selectBase and args.all and args.export): mongo.exportAllCollectioni(args.selectBase) if (args.selectBase and args.selectCollection and args.export): mongo.exportOneCollection(args.selectBase, args.selectCollection)
for line in range(1, ws.max_row + 1): name = ws.cell(line, 1).value link = ws.cell(line, 2).value line += 1 data[name] = link template = { "username": "******", "ip": "127.0.0.1", "date": "", "browser": "cli", "content": "", "resource_id": 234, "type": "parent" } col = Mongo().db["comment"] share_doc = { "status": 1.0, "info": "OK", "data": { "info": { "id": 234, "cnname": "网友分享", "enname": "", "aliasname": "", "channel": "share", "channel_cn": "", "area": "", "show_type": "", "expire": "1610401225", "views": 0
'hector.gif': ':hector_bird:', 'duh.gif': ':duh:', 'mellow.gif': ':mellow:', 'blush.gif': ':blush:', 'despair.gif': ':despair', 'thumbsdown.gif': ':thumbsdown:', 'leno.gif': ':leno:', 'gasp.gif': ':o' } # setup html2markdown md_handler = html2text.HTML2Text() md_handler.body_width = 0 # setup mongo client for creating and storing fields mon = Mongo() category = 6 # destination forum num, 6 for cythera web and 7 for chronicles # setup crawler to reuse same connection # http = urllib3.PoolManager() # use list as stack to visit pages in following order: # add topics, then topic next link, then forum next page link # on a stack, visits all forum pages, then all topic pages, and all topics in reverse order (starts at oldest essentially) url_stack = [] # Load from "crawl.txt" file list # with open('crawl.txt') as f: # for line in f: # if not line.strip() == '': # url_stack.append(line.strip())