def compare_response_ratio(source_job: Job, target_job: Job) -> Result: """Compare request with response per item ratio""" s_ratio = round( api.get_requests_count(source_job) / api.get_items_count(source_job), 2) t_ratio = round( api.get_requests_count(target_job) / api.get_items_count(target_job), 2) response_ratio_diff = helpers.ratio_diff(s_ratio, t_ratio) msg = f"Difference is {response_ratio_diff:.2%} - {s_ratio} and {t_ratio}" result = Result("Compare Responses Per Item Ratio") if response_ratio_diff > 0.2: result.add_error(msg) elif response_ratio_diff > 0.1: result.add_warning(msg) return result
def check_response_ratio(job: Job) -> Result: requests_number = api.get_requests_count(job) items_count = api.get_items_count(job) result = Result("Responses Per Item Ratio") result.add_info( f"Number of responses / Number of scraped items - " f"{round(requests_number / items_count, 2)}" ) return result
def compare_response_ratio(source_job: Job, target_job: Job) -> Result: """Compare request with response per item ratio""" items_count1 = api.get_items_count(source_job) items_count2 = api.get_items_count(target_job) source_ratio = round(api.get_requests_count(source_job) / items_count1, 2) target_ratio = round(api.get_requests_count(target_job) / items_count2, 2) response_ratio_diff = helpers.ratio_diff(source_ratio, target_ratio) msg = "Difference is {}% - {} and {}".format( response_ratio_diff * 100, source_ratio, target_ratio ) result = Result("Compare Responses Per Item Ratio") if response_ratio_diff > 0.2: result.add_error(msg) elif response_ratio_diff > 0.1: result.add_warning(msg) return result
def job_summary_table(job) -> go.FigureWidget: job_url = f"{SH_URL}/{job.key}" job_state = api.get_job_state(job) job_close_reason = api.get_job_close_reason(job) no_of_scraped_items = api.get_items_count(job) no_of_errors = api.get_errors_count(job) job_runtime = api.get_runtime_s(job) / 1000 run_time = helpers.ms_to_time(job_runtime) crawling_speed = round(job_runtime / 60 / no_of_scraped_items, 3) request_success_ratio = round( api.get_requests_count(job) / float(no_of_scraped_items), 2) max_memusage = api.get_max_memusage(job) response_status_count = api.get_response_status_count(job) crawlera_stat_value = api.get_crawlera_user(job) if not crawlera_stat_value: crawlera_stat_value = "Not Used" job_stats_values = [ "Job URL", "Spider State", "Spider Close Reason", "Number of Scraped Items", "Number of Errors", "Runtime", "Request Success Ratio [requests/scraped items]", "Crawling Speed [items/min]", "Crawlera user", "Max Memory Usage [Bytes]", "Response Status Count", ] stats_values = [ '<a href="' + job_url + '">' + job_url + "</a>", job_state, job_close_reason, no_of_scraped_items, no_of_errors, run_time, request_success_ratio, crawling_speed, crawlera_stat_value, max_memusage, "200: " + str(response_status_count[0]) + "<br>" + "301: " + str(response_status_count[1]) + "<br>" + "404: " + str(response_status_count[2]) + "<br>" + "503: " + str(response_status_count[3]) + "<br>", ] trace = go.Table( columnorder=[1, 2], columnwidth=[300, 200], header=dict( values=["<b>Job Stat</b>", "<b>Stat Value</b>"], fill=dict(color="gray"), align=["left"] * 5, font=dict(color="black", size=14), height=30, ), cells=dict( values=[job_stats_values, stats_values], fill=dict(color="lightgrey"), font=dict(color="black", size=12), height=25, align=["left"] * 5, ), ) spider = job.metadata.get("spider") layout = go.Layout( title=f"Summary for spider {spider}", autosize=True, margin=dict(t=40, b=25, l=0, r=0), height=445, ) return go.FigureWidget(data=[trace], layout=layout)