def main(script, seed_url, limit = -1): limit = int(limit) import crawler crawler.init(seed_url) crawler.run(limit)
def main(): """ Tester entry point """ crawler = VulnerabilitiesCrawler('http://web/') # Show the obtained information print("\n \n >>>>>>>" + crawler.run())
def _click(): split_url = entry_url_entered.get().split('/') is_url_valid = False for w in split_url: if w == 'question': is_url_valid = True continue if (is_url_valid is True): app_ctx.question_id = w break if not is_url_valid: label_download_result['text'] = '网址有误,请重新输入。' else: label_download_result['text'] = '正在缓存...' crawler.run(app_ctx) label_download_result['text'] = '缓存完成'
def test_crawl_echo_server(self): urls = [ f'http://{self.client.host}:{self.client.port}/{id_}' for id_ in range(10) ] results = crawler.run(urls=urls, limit=5, loop=self.loop) self.assertSetEqual(set(results), set(map(str, range(10))))
def hello_world(): hotels_data = [] start_date = request.args.get('start_date', '').replace('-', "") final_date = request.args.get('final_date', '').replace('-', "") db_data = run(start_date, final_date, 0) for hotel in db_data: hotels_data.append(hotel.to_json()) return jsonify(hotels_data)
def run(): try: optlist, args = getopt.getopt(sys.argv[1:], OPTIONS, LONG_OPTIONS) except getopt.GetoptError as e: print str(e) sys.exit(2) thread_num = 10 output_path = "pics" capicity = -1 begin_url = "22mm.cc" for option, value in optlist: if option == "-h": usage() sys.exit() elif option == "-n": try: thread_num = int(value) except Exception as e: print "command error" usage() sys.exit(2) elif option == "-o": output_path = value elif option == "-l": try: capicity = int(value) except Exception as e: print "command error" usage() sys.exit(2) # elif option == "-s": # begin_url = value try: if not os.path.isdir(output_path): os.makedirs(output_path) except Exception as e: print "invalid path" sys.exit(2) type, host = urllib.splittype(begin_url) if not type: begin_url = "http://" + begin_url crawler.run(begin_url, capicity, output_path, thread_num)
def crawl (): '' ' Suatu fungsi untuk menggunakan src / crawler.py ' '' vprint ( " Perayap dimulai " , " hijau " , " info " ) untuk situs di sitesFromReverse: vprint ( " Merangkak -> " + situs, " yellow " , " info " ) sites = crawler.run (situs) if type (situs) ! = daftar : vprint (situs, " red " , " err " ) terus
def post(request): if request.method == "POST": user_url = request.POST.get("user-code") print("사용자 요청 URL : " + user_url) timetable_list = crawler.run(user_url) if timetable_list == None: return render(request, 'error-url.html', {'user_url': user_url}) info_list = crawler.lecture_list(timetable_list) # print(info_list) this_monday = datetime.date(2019, 9, 2) while this_monday < datetime.date(2019, 12, 31): for i in timetable_list: #과목별 for j in i.dates: #날자별 d = this_monday + datetime.timedelta(days=int(j['day'])) hour, min = crawler.calc_time(int(j['start_time'])) s = datetime.time(hour, min, 0) hour, min = crawler.calc_time(int(j['end_time'])) e = datetime.time(hour, min, 0) start = datetime.datetime.combine(d, s) end = datetime.datetime.combine(d, e) Event(owner=request.user, title=i.name, place=i.place, start=start, end=end, is_from_timetable=True).save() this_monday += datetime.timedelta(days=7) return render(request, 'check-info.html', {'info_list': info_list}) # form = PostForm(request.POST) # if form.is_valid(): # lotto = form.save(commit = False) # lotto.generate() # return redirect('connect-everytime') else: return render(request, "error.html")
dft = False bft = False if formData.getvalue('startingSite'): startingSite = formData.getvalue('startingSite') else: startingSite = None if formData.getvalue('crawlLimit'): crawlLimit = int(formData.getvalue('crawlLimit')) else: crawlLimit = 0 if formData.getvalue('kWord'): kWord = formData.getvalue('kWord') else: kWord = None return dft, bft, startingSite, crawlLimit, kWord ############################################################################### # Main Function ############################################################################### # Get the data from the user-submitted form and set the global variable values formData = cgi.FieldStorage() dft, bft, startingSite, crawlLimit, kWord = getFormData(formData) # Output results print "Content-Type: text/html;charset=utf-8\n" print crawler.run(startingSite, bft, crawlLimit, kWord)
buffer_handler = BufferingLogHandler(capacity=500) setup_logging( log_folder_path=LOG_PATH, log_level=LOG_LEVEL, external_lib_log_level="WARNING", rotate_logger_configuration=ROTATE_CONFIG, extra_handlers=[buffer_handler]) logger = logging.getLogger('Max') logger.debug("Deb") logger.info('inf') logger.warning('warn') logger.error('errrrr') logger.fatal('fat!') print("buffer returned: ", list(map(str, buffer_handler.flush()))) exit() import crawler crawler.run() print("%"*50) logging.getLogger().handlers[2].flush() setup_logging(log_folder_path=LOG_PATH, log_level=LOG_LEVEL, external_lib_log_level="WARNING", rotate_logger_configuration=ROTATE_CONFIG) import crawler
def analysis(): logger.info("I'm working..." + time.ctime()) run()
dft = False bft = False if formData.getvalue('startingSite'): startingSite = formData.getvalue('startingSite') else: startingSite = None if formData.getvalue('crawlLimit'): crawlLimit = int(formData.getvalue('crawlLimit')) else: crawlLimit = 0 if formData.getvalue('kWord'): kWord = formData.getvalue('kWord') else: kWord = None return dft, bft, startingSite, crawlLimit, kWord ############################################################################### # Main Function ############################################################################### # Get the data from the user-submitted form and set the global variable values formData = cgi.FieldStorage() dft, bft, startingSite, crawlLimit, kWord = getFormData(formData) # Testing code print "Content-Type: text/html;charset=utf-8\n" print crawler.run("https://www.talkingpointsmemo.com", dft, 5, None)
def runcrawler(): connect = psycopg2.connect(db_config) crawler.run(connect) connect.close()
import argparse import crawler import phones_parser parser = argparse.ArgumentParser() parser.add_argument('--tasks', help='path to file with tasks list (one url per line)', required=True) parser.add_argument('-n', help='number of simultaneously performed requests', default=100) args = parser.parse_args() with open(args.tasks) as f: urls = [url.strip() for url in f.readlines() if url.strip()] pages = crawler.run(urls, limit=args.n) phones = {phone for phone in phones_parser.parse(page for page in pages)} for phone in phones: print(phone)