Python runの例、crawler.run Pythonの例

コード例 #1

0

ファイルを表示

ファイル: main.py プロジェクト: kapv89/slither

def main(script, seed_url, limit = -1):
  limit = int(limit)

  import crawler
  
  crawler.init(seed_url)
  
  crawler.run(limit)

コード例 #2

0

ファイルを表示

ファイル: main.py プロジェクト: vott/sqlitester

def main():
    """
    Tester entry point
    """
    crawler = VulnerabilitiesCrawler('http://web/')
    # Show the obtained information
    print("\n \n >>>>>>>" + crawler.run())

コード例 #3

0

ファイルを表示

def _click():
    split_url = entry_url_entered.get().split('/')
    is_url_valid = False
    for w in split_url:
        if w == 'question':
            is_url_valid = True
            continue
        if (is_url_valid is True):
            app_ctx.question_id = w
            break
    if not is_url_valid:
        label_download_result['text'] = '网址有误，请重新输入。'
    else:
        label_download_result['text'] = '正在缓存...'
        crawler.run(app_ctx)
        label_download_result['text'] = '缓存完成'

コード例 #4

0

ファイルを表示

ファイル: test_crawler.py プロジェクト: walkingpendulum/phones-crawler

    def test_crawl_echo_server(self):
        urls = [
            f'http://{self.client.host}:{self.client.port}/{id_}'
            for id_ in range(10)
        ]
        results = crawler.run(urls=urls, limit=5, loop=self.loop)

        self.assertSetEqual(set(results), set(map(str, range(10))))

コード例 #5

0

ファイルを表示

ファイル: routes.py プロジェクト: hamzabouissi/Trivago

def hello_world():
    hotels_data = []
    start_date = request.args.get('start_date', '').replace('-', "")
    final_date = request.args.get('final_date', '').replace('-', "")

    db_data = run(start_date, final_date, 0)
    for hotel in db_data:
        hotels_data.append(hotel.to_json())
    return jsonify(hotels_data)

コード例 #6

0

ファイルを表示

ファイル: main.py プロジェクト: yeyuexia/mm_crawler

def run():
    try:
        optlist, args = getopt.getopt(sys.argv[1:], OPTIONS, LONG_OPTIONS)
    except getopt.GetoptError as e:
        print str(e)
        sys.exit(2)
    thread_num = 10
    output_path = "pics"
    capicity = -1
    begin_url = "22mm.cc"
    for option, value in optlist:
        if option == "-h":
            usage()
            sys.exit()
        elif option == "-n":
            try:
                thread_num = int(value)
            except Exception as e:
                print "command error"
                usage()
                sys.exit(2)
        elif option == "-o":
            output_path = value
        elif option == "-l":
            try:
                capicity = int(value)
            except Exception as e:
                print "command error"
                usage()
                sys.exit(2)


#        elif option == "-s":
#            begin_url = value
    try:
        if not os.path.isdir(output_path):
            os.makedirs(output_path)
    except Exception as e:
        print "invalid path"
        sys.exit(2)
    type, host = urllib.splittype(begin_url)
    if not type:
        begin_url = "http://" + begin_url
    crawler.run(begin_url, capicity, output_path, thread_num)

コード例 #7

0

ファイルを表示

ファイル: main.py プロジェクト: yeyuexia/mm_crawler

def run():
    try:
        optlist, args = getopt.getopt(sys.argv[1:], OPTIONS, LONG_OPTIONS)
    except getopt.GetoptError as e:
        print str(e)
        sys.exit(2)
    thread_num = 10
    output_path = "pics"
    capicity = -1
    begin_url = "22mm.cc"
    for option, value in optlist:
        if option == "-h":
            usage()
            sys.exit()
        elif option == "-n":
            try:
                thread_num = int(value)
            except Exception as e:
                print "command error"
                usage()
                sys.exit(2)
        elif option == "-o":
            output_path = value
        elif option == "-l":
            try:
                capicity = int(value)
            except Exception as e:
                print "command error"
                usage()
                sys.exit(2)
#        elif option == "-s":
#            begin_url = value
    try:
        if not os.path.isdir(output_path):
            os.makedirs(output_path)
    except Exception as e:
        print "invalid path"
        sys.exit(2)
    type, host = urllib.splittype(begin_url)
    if not type:
        begin_url = "http://" + begin_url
    crawler.run(begin_url, capicity, output_path, thread_num)

コード例 #8

0

ファイルを表示

def  crawl ():
	'' ' Suatu fungsi untuk menggunakan src / crawler.py ' ''

	vprint ( " Perayap dimulai " , " hijau " , " info " )

	untuk situs di sitesFromReverse:
		vprint ( " Merangkak -> "  + situs, " yellow " , " info " )

		sites = crawler.run (situs)

		if  type (situs) ! =  daftar :
			vprint (situs, " red " , " err " )
			terus

コード例 #9

0

ファイルを表示

ファイル: views.py プロジェクト: SmartWebService/SmartStudyCalendar

def post(request):
    if request.method == "POST":
        user_url = request.POST.get("user-code")
        print("사용자 요청 URL : " + user_url)

        timetable_list = crawler.run(user_url)

        if timetable_list == None:
            return render(request, 'error-url.html', {'user_url': user_url})
        info_list = crawler.lecture_list(timetable_list)
        # print(info_list)

        this_monday = datetime.date(2019, 9, 2)
        while this_monday < datetime.date(2019, 12, 31):
            for i in timetable_list:  #과목별
                for j in i.dates:  #날자별
                    d = this_monday + datetime.timedelta(days=int(j['day']))

                    hour, min = crawler.calc_time(int(j['start_time']))
                    s = datetime.time(hour, min, 0)

                    hour, min = crawler.calc_time(int(j['end_time']))
                    e = datetime.time(hour, min, 0)

                    start = datetime.datetime.combine(d, s)
                    end = datetime.datetime.combine(d, e)

                    Event(owner=request.user,
                          title=i.name,
                          place=i.place,
                          start=start,
                          end=end,
                          is_from_timetable=True).save()
            this_monday += datetime.timedelta(days=7)

        return render(request, 'check-info.html', {'info_list': info_list})
        # form = PostForm(request.POST)
        # if form.is_valid():
        #     lotto = form.save(commit = False)
        #     lotto.generate()
        # return redirect('connect-everytime')
    else:
        return render(request, "error.html")

コード例 #10

0

ファイルを表示

ファイル: webcrawlProg.py プロジェクト: fredkontur/webcrawler

        dft = False
        bft = False
    
    if formData.getvalue('startingSite'):
        startingSite = formData.getvalue('startingSite')
    else:
        startingSite = None
    
    if formData.getvalue('crawlLimit'):
        crawlLimit = int(formData.getvalue('crawlLimit'))
    else:
        crawlLimit = 0
    
    if formData.getvalue('kWord'):
        kWord = formData.getvalue('kWord')
    else:
        kWord = None
    
    return dft, bft, startingSite, crawlLimit, kWord

###############################################################################
# Main Function
###############################################################################
# Get the data from the user-submitted form and set the global variable values
formData = cgi.FieldStorage()
dft, bft, startingSite, crawlLimit, kWord = getFormData(formData)

# Output results
print "Content-Type: text/html;charset=utf-8\n"
print crawler.run(startingSite, bft, crawlLimit, kWord)

コード例 #11

0

ファイルを表示

    
    buffer_handler = BufferingLogHandler(capacity=500)
    setup_logging(
        log_folder_path=LOG_PATH, 
        log_level=LOG_LEVEL,
        external_lib_log_level="WARNING",
        rotate_logger_configuration=ROTATE_CONFIG,
        extra_handlers=[buffer_handler])
    
    logger = logging.getLogger('Max')
    logger.debug("Deb")
    logger.info('inf')
    logger.warning('warn')
    logger.error('errrrr')
    logger.fatal('fat!')
    print("buffer returned: ", list(map(str, buffer_handler.flush())))


    exit()
    import crawler
    crawler.run()

    print("%"*50)
    logging.getLogger().handlers[2].flush()


    setup_logging(log_folder_path=LOG_PATH, log_level=LOG_LEVEL, external_lib_log_level="WARNING",
                  rotate_logger_configuration=ROTATE_CONFIG)

    import crawler

コード例 #12

0

ファイルを表示

ファイル: main.py プロジェクト: XiaohuiYang/wx

def analysis():
    logger.info("I'm working..." + time.ctime())
    run()

コード例 #13

0

ファイルを表示

ファイル: webcrawlProg2.py プロジェクト: peter7n/pavo-webcrawler

        dft = False
        bft = False
    
    if formData.getvalue('startingSite'):
        startingSite = formData.getvalue('startingSite')
    else:
        startingSite = None
    
    if formData.getvalue('crawlLimit'):
        crawlLimit = int(formData.getvalue('crawlLimit'))
    else:
        crawlLimit = 0
    
    if formData.getvalue('kWord'):
        kWord = formData.getvalue('kWord')
    else:
        kWord = None
    
    return dft, bft, startingSite, crawlLimit, kWord

###############################################################################
# Main Function
###############################################################################
# Get the data from the user-submitted form and set the global variable values
formData = cgi.FieldStorage()
dft, bft, startingSite, crawlLimit, kWord = getFormData(formData)

# Testing code
print "Content-Type: text/html;charset=utf-8\n"
print crawler.run("https://www.talkingpointsmemo.com", dft, 5, None)

コード例 #14

0

ファイルを表示

def runcrawler():
    connect = psycopg2.connect(db_config)
    crawler.run(connect)
    connect.close()

コード例 #15

0

ファイルを表示

def analysis():
    logger.info("I'm working..." + time.ctime())
    run()

コード例 #16

0

ファイルを表示

import argparse

import crawler
import phones_parser

parser = argparse.ArgumentParser()
parser.add_argument('--tasks',
                    help='path to file with tasks list (one url per line)',
                    required=True)
parser.add_argument('-n',
                    help='number of simultaneously performed requests',
                    default=100)
args = parser.parse_args()

with open(args.tasks) as f:
    urls = [url.strip() for url in f.readlines() if url.strip()]

pages = crawler.run(urls, limit=args.n)
phones = {phone for phone in phones_parser.parse(page for page in pages)}

for phone in phones:
    print(phone)