def test_happy(self) -> None: """Tests the happy path.""" mock_overpass_sleep_called = False def mock_overpass_sleep() -> None: nonlocal mock_overpass_sleep_called mock_overpass_sleep_called = True result_from_overpass = "******" def mock_urlopen(_url: str, _data: Optional[bytes] = None) -> BinaryIO: buf = io.BytesIO() buf.write(result_from_overpass.encode('utf-8')) buf.seek(0) return buf with unittest.mock.patch("cron.overpass_sleep", mock_overpass_sleep): with unittest.mock.patch('urllib.request.urlopen', mock_urlopen): relations = get_relations() for relation_name in relations.get_active_names(): if relation_name != "gazdagret": relations.get_relation(relation_name).get_config().set_active(False) expected = util.get_content(relations.get_workdir(), "streets-gazdagret.csv") path = os.path.join(relations.get_workdir(), "streets-gazdagret.csv") os.unlink(path) cron.update_osm_streets(relations, update=True) mtime = os.path.getmtime(path) cron.update_osm_streets(relations, update=False) self.assertEqual(os.path.getmtime(path), mtime) self.assertTrue(mock_overpass_sleep_called) actual = util.get_content(relations.get_workdir(), "streets-gazdagret.csv") self.assertEqual(actual, expected)
def test_http_error(self) -> None: """Tests the case when we keep getting HTTP errors.""" ctx = test_context.make_test_context() routes: List[test_context.URLRoute] = [ test_context.URLRoute( url="https://overpass-api.de/api/status", data_path="", result_path="tests/network/overpass-status-happy.txt"), test_context.URLRoute( url="https://overpass-api.de/api/interpreter", data_path="", result_path=""), ] network = test_context.TestNetwork(routes) ctx.set_network(network) relations = areas.Relations(ctx) for relation_name in relations.get_active_names(): if relation_name != "gazdagret": relations.get_relation(relation_name).get_config().set_active( False) expected = util.get_content(relations.get_workdir(), "streets-gazdagret.csv") cron.update_osm_streets(ctx, relations, update=True) # Make sure that in case we keep getting errors we give up at some stage and # leave the last state unchanged. actual = util.get_content(relations.get_workdir(), "streets-gazdagret.csv") self.assertEqual(actual, expected)
def test_happy(self) -> None: """Tests the happy path.""" ctx = test_context.make_test_context() routes: List[test_context.URLRoute] = [ test_context.URLRoute( url="https://overpass-api.de/api/status", data_path="", result_path="tests/network/overpass-status-happy.txt"), test_context.URLRoute( url="https://overpass-api.de/api/interpreter", data_path="", result_path="tests/network/overpass-streets-gazdagret.csv"), ] network = test_context.TestNetwork(routes) ctx.set_network(network) relations = areas.Relations(ctx) for relation_name in relations.get_active_names(): if relation_name != "gazdagret": relations.get_relation(relation_name).get_config().set_active( False) expected = util.get_content(relations.get_workdir(), "streets-gazdagret.csv") path = os.path.join(relations.get_workdir(), "streets-gazdagret.csv") os.unlink(path) cron.update_osm_streets(ctx, relations, update=True) mtime = os.path.getmtime(path) cron.update_osm_streets(ctx, relations, update=False) self.assertEqual(os.path.getmtime(path), mtime) actual = util.get_content(relations.get_workdir(), "streets-gazdagret.csv") self.assertEqual(actual, expected)
def test_happy(self) -> None: """Tests the happy path.""" with unittest.mock.patch('util.get_abspath', get_abspath): relations = get_relations() for relation_name in relations.get_active_names(): if relation_name not in ("gazdagret", "ujbuda"): relations.get_relation( relation_name).get_config().set_active(False) config = webframe.get_config() expected = util.get_content( relations.get_workdir(), "street-housenumbers-reference-gazdagret.lst") os.unlink( os.path.join(relations.get_workdir(), "street-housenumbers-reference-gazdagret.lst")) cron.update_ref_housenumbers(relations, config) actual = util.get_content( relations.get_workdir(), "street-housenumbers-reference-gazdagret.lst") self.assertEqual(actual, expected) # Make sure housenumber ref is not created for the streets=only case. ujbuda_path = os.path.join( relations.get_workdir(), "street-housenumbers-reference-ujbuda.lst") self.assertFalse(os.path.exists(ujbuda_path))
def test_happy(self) -> None: """Tests the happy path.""" ctx = test_context.make_test_context() ctx.set_time(test_context.make_test_time()) routes: List[test_context.URLRoute] = [ test_context.URLRoute( url="https://overpass-api.de/api/status", data_path="", result_path="tests/network/overpass-status-happy.txt"), test_context.URLRoute( url="https://overpass-api.de/api/interpreter", data_path="", result_path="tests/network/overpass-stats.csv"), ] network = test_context.TestNetwork(routes) ctx.set_network(network) # Create a CSV that is definitely old enough to be removed. old_path = ctx.get_abspath("workdir/stats/old.csv") create_old_file(old_path) today = time.strftime("%Y-%m-%d") path = ctx.get_abspath("workdir/stats/%s.csv" % today) cron.update_stats(ctx, overpass=True) actual = util.get_content(path) self.assertEqual(actual, util.get_content("tests/network/overpass-stats.csv")) # Make sure that the old CSV is removed. self.assertFalse(os.path.exists(old_path)) with open(ctx.get_abspath("workdir/stats/ref.count"), "r") as stream: num_ref = int(stream.read().strip()) self.assertEqual(num_ref, 300)
def test_happy(self) -> None: """Tests the happy path.""" relations = get_relations() for relation_name in relations.get_active_names(): # gellerthegy is streets=no if relation_name not in ("gazdagret", "gellerthegy"): relations.get_relation(relation_name).get_config().set_active( False) path = os.path.join(relations.get_workdir(), "gazdagret-additional-streets.count") expected = "1" if os.path.exists(path): util.get_content(path) os.unlink(path) cron.update_additional_streets(relations, update=True) mtime = os.path.getmtime(path) cron.update_additional_streets(relations, update=False) self.assertEqual(os.path.getmtime(path), mtime) actual = util.get_content(path) self.assertEqual(actual, expected) # Make sure street stat is not created for the streets=no case. self.assertFalse( os.path.exists( os.path.join(relations.get_workdir(), "gellerthegy-additional-streets.count")))
def handle_static( request_uri: str) -> Tuple[bytes, str, List[Tuple[str, str]]]: """Handles serving static content.""" tokens = request_uri.split("/") path = tokens[-1] extra_headers: List[Tuple[str, str]] = [] if request_uri.endswith(".js"): content_type = "application/x-javascript" content = util.get_content(config.Config.get_workdir(), path, extra_headers) return content, content_type, extra_headers if request_uri.endswith(".css"): content_type = "text/css" content = util.get_content(config.get_abspath("static"), path, extra_headers) return content, content_type, extra_headers if request_uri.endswith(".json"): content_type = "application/json" content = util.get_content( os.path.join(config.Config.get_workdir(), "stats"), path, extra_headers) return content, content_type, extra_headers if request_uri.endswith(".ico"): content_type = "image/x-icon" content = util.get_content(config.get_abspath(""), path, extra_headers) return content, content_type, extra_headers return bytes(), "", extra_headers
def test_http_error(self) -> None: """Tests the case when we keep getting HTTP errors.""" mock_overpass_sleep_called = False def mock_overpass_sleep() -> None: nonlocal mock_overpass_sleep_called mock_overpass_sleep_called = True with unittest.mock.patch("cron.overpass_sleep", mock_overpass_sleep): with unittest.mock.patch('urllib.request.urlopen', mock_urlopen_raise_error): relations = get_relations() for relation_name in relations.get_active_names(): if relation_name != "gazdagret": relations.get_relation( relation_name).get_config().set_active(False) expected = util.get_content(relations.get_workdir(), "streets-gazdagret.csv") cron.update_osm_streets(relations, update=True) self.assertTrue(mock_overpass_sleep_called) # Make sure that in case we keep getting errors we give up at some stage and # leave the last state unchanged. actual = util.get_content(relations.get_workdir(), "streets-gazdagret.csv") self.assertEqual(actual, expected)
def test_happy(self) -> None: """Tests the happy path.""" refpath = config.get_abspath(os.path.join("refdir", "utcak_20190514.tsv")) relations = get_relations() relation_name = "gazdagret" relation = relations.get_relation(relation_name) expected = util.get_content(relations.get_workdir(), "streets-reference-gazdagret.lst") relation.write_ref_streets(refpath) actual = util.get_content(relations.get_workdir(), "streets-reference-gazdagret.lst") self.assertEqual(actual, expected)
def test_happy(self) -> None: """Tests the happy path.""" relations = get_relations() relation_name = "gazdagret" relation = relations.get_relation(relation_name) result_from_overpass = "******" expected = util.get_content(relations.get_workdir(), "streets-gazdagret.csv") relation.get_files().write_osm_streets(result_from_overpass) actual = util.get_content(relations.get_workdir(), "streets-gazdagret.csv") self.assertEqual(actual, expected)
def test_happy(self) -> None: """Tests the happy path.""" expected = util.get_content(config.get_abspath("workdir/street-housenumbers-reference-gazdagret.lst")) argv = ["", "gazdagret"] with unittest.mock.patch('sys.argv', argv): get_reference_housenumbers.main() actual = util.get_content(config.get_abspath("workdir/street-housenumbers-reference-gazdagret.lst")) self.assertEqual(actual, expected)
def test_happy(self) -> None: """Tests the happy path.""" refdir = os.path.join(os.path.dirname(__file__), "refdir") refpath = os.path.join(refdir, "hazszamok_20190511.tsv") refpath2 = os.path.join(refdir, "hazszamok_kieg_20190808.tsv") relations = get_relations() relation_name = "gazdagret" expected = util.get_content(relations.get_workdir(), "street-housenumbers-reference-gazdagret.lst") relation = relations.get_relation(relation_name) relation.write_ref_housenumbers([refpath, refpath2]) actual = util.get_content(relations.get_workdir(), "street-housenumbers-reference-gazdagret.lst") self.assertEqual(actual, expected)
def test_happy(self) -> None: """Tests the happy path.""" relations = get_relations() relation_name = "gazdagret" relation = relations.get_relation(relation_name) expected = util.get_content(relations.get_workdir(), "gazdagret-streets.percent") ret = relation.write_missing_streets() todo_count, done_count, percent, streets = ret self.assertEqual(todo_count, 1) self.assertEqual(done_count, 4) self.assertEqual(percent, '80.00') self.assertEqual(streets, ['Only In Ref utca']) actual = util.get_content(relations.get_workdir(), "gazdagret-streets.percent") self.assertEqual(actual, expected)
def test_happy(self) -> None: """Tests the happy path.""" with unittest.mock.patch('util.get_abspath', get_abspath): relations = get_relations() for relation_name in relations.get_active_names(): # gellerthegy is streets=no if relation_name not in ("gazdagret", "gellerthegy"): relations.get_relation(relation_name).get_config().set_active(False) expected = util.get_content(relations.get_workdir(), "gazdagret-streets.percent") os.unlink(os.path.join(relations.get_workdir(), "gazdagret-streets.percent")) cron.update_missing_streets(relations) actual = util.get_content(relations.get_workdir(), "gazdagret-streets.percent") self.assertEqual(actual, expected) # Make sure street stat is not created for the streets=no case. self.assertFalse(os.path.exists(os.path.join(relations.get_workdir(), "gellerthegy-streets.percent")))
def test_happy(self) -> None: """Tests the happy path.""" def get_abspath(path: str) -> str: if os.path.isabs(path): return path return os.path.join(os.path.dirname(__file__), path) with unittest.mock.patch('config.get_abspath', get_abspath): expected = util.get_content(get_abspath("workdir/street-housenumbers-reference-gazdagret.lst")) argv = ["", "gazdagret"] with unittest.mock.patch('sys.argv', argv): get_reference_housenumbers.main() actual = util.get_content(get_abspath("workdir/street-housenumbers-reference-gazdagret.lst")) self.assertEqual(actual, expected)
def thread(): if util.validate_request_post(request.form) is not None: return jsonify(type='danger', text=util.validate_request_post(request.form)) try: submission = util.r.submission(url=request.form['submission']) except: return jsonify(type='danger', text='That wasn\'t a reddit link, was it?') if not submission.url.startswith('https://www.reddit.com/r/'): body = util.get_content(submission.url) else: body = util.markdown(submission.selftext, output_format='html5') title = submission.title author = "[deleted]" if submission.author is not None: author = submission.author.name address = request.form['email'] kindle_address = request.form['kindle_address'] comments = None if request.form['comments'] == 'true': submission.comments.replace_more(limit=0) comments = util.get_comments(submission, request.form['comments_style'], author) attachment = render_template('comments.html', title=title, body=body, author=author, comments=comments) status = util.send_email(address, kindle_address, attachment, title) if status is None: return jsonify(type='success', text='Success!') else: return jsonify(type='warning', text='Uh oh! Something went wrong on our end')
def find_question_by_link(self, topic_url, count_id): content = get_content(topic_url, count_id) if content in ["FAIL", "NO FOUND"]: return 0 soup = BeautifulSoup(content) questions = soup.findAll( 'div', attrs={'class': 'feed-item feed-item-hook question-item'}) i = 0 p_str = 'INSERT IGNORE INTO QUESTION (TOPIC_URL, NAME, LINK_ID, FOCUS, ANSWER, LAST_VISIT, ADD_TIME, TOP_ANSWER_NUMBER) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)' anser_list = [] time_now = int(time.time()) for question in questions: # print question temp_question = question.find('a', attrs={'class': 'question_link'}) answer_amount = question.find('meta', attrs={'itemprop': 'answerCount'}) answer_amount = answer_amount.get('content') # answer_amount = answer_focus_amount[0].get_text() # focus_amount = answer_focus_amount[1].get_text() tem_text = temp_question.get_text() tem_id = temp_question.get('href') tem_id = tem_id.replace('/question/', '') anser_list = anser_list + [(topic_url, tem_text, int(tem_id), 0, int(answer_amount), 0, time_now, 0)] self.cursor.executemany(p_str, anser_list) return self.cursor.rowcount
def getUniversityStudent(self, inputid): nameList = [] time_now = int(time.time()) inputUrl = self.homepage + inputid + self.infopage tmpContent = get_content(inputUrl) soup = bs(tmpContent.text) time.sleep(1) divlabel = soup.find_all('div', 'tip') ''' try: self.personalInfo = divlabel[0].next_sibling.get_text('|',strip=True) self.schoolInfo = divlabel[1].next_sibling.get_text() #mySchool = u'安徽医科大' mySchool = self.school if mySchool in self.schoolInfo: name,sex,hometown = impRe(self.personalInfo) self.nameList = self.nameList +[(name,0,inputid,time_now,sex, hometown)] except: pass ''' self.personalInfo = divlabel[0].next_sibling.get_text('|', strip=True) self.schoolInfo = divlabel[1].next_sibling.get_text() mySchool = self.school if mySchool in self.schoolInfo: name, sex, hometown = impRe(self.personalInfo) self.nameList = self.nameList + [ (name, 0, inputid, time_now, sex, hometown) ]
def run(self): url = "http://www.zhihu.com/people/" + self.zh_username + "/topics" content = get_content(url, 1) time_now = time.time() if content == "FAIL": print "Fail to open the url:" + url return else: soup = BeautifulSoup(content) results = soup.findAll('div', attrs={'class':'zm-profile-section-main'}) if results == None or len(results) == 0: print "No topic is found!" return for result in results: m = re.search(r'topic/(\d*)', str(result)) if m == None: print "No matching topic" continue else: link_id = m.group(1) m = re.search(r'<strong>(.*?)</strong>', str(result)) if m == None: print "No topic name!" continue else: name = m.group(1) print link_id, name sqlcmd = "INSERT IGNORE INTO TOPIC (NAME, LAST_VISIT, LINK_ID, ADD_TIME) VALUES (%s, %s, %s, %s)" self.cursor.execute(sqlcmd, (name, 0, link_id, time_now))
def handle_main_housenr_percent( relation: areas.Relation) -> Tuple[yattag.doc.Doc, str]: """Handles the house number percent part of the main page.""" prefix = config.Config.get_uri_prefix() url = prefix + "/missing-housenumbers/" + relation.get_name( ) + "/view-result" percent = "N/A" if os.path.exists(relation.get_files().get_housenumbers_percent_path()): percent = util.get_content( relation.get_files().get_housenumbers_percent_path()).decode( "utf-8") doc = yattag.doc.Doc() if percent != "N/A": date = get_last_modified( relation.get_files().get_housenumbers_percent_path()) with doc.tag("strong"): with doc.tag("a", href=url, title=_("updated") + " " + date): doc.text(util.format_percent(percent)) return doc, percent with doc.tag("strong"): with doc.tag("a", href=url): doc.text(_("missing house numbers")) return doc, "0"
def handle_main_street_percent( ctx: context.Context, relation: areas.Relation) -> Tuple[yattag.doc.Doc, str]: """Handles the street percent part of the main page.""" prefix = ctx.get_ini().get_uri_prefix() url = prefix + "/missing-streets/" + relation.get_name() + "/view-result" percent = "N/A" if ctx.get_file_system().path_exists( relation.get_files().get_streets_percent_path()): percent = util.get_content( relation.get_files().get_streets_percent_path()).decode("utf-8") doc = yattag.doc.Doc() if percent != "N/A": date = get_last_modified( relation.get_files().get_streets_percent_path()) with doc.tag("strong"): with doc.tag("a", href=url, title=tr("updated") + " " + date): doc.text(util.format_percent(percent)) return doc, percent with doc.tag("strong"): with doc.tag("a", href=url): doc.text(tr("missing streets")) return doc, "0"
def handle_main_housenr_additional_count( ctx: context.Context, relation: areas.Relation) -> yattag.doc.Doc: """Handles the housenumber additional count part of the main page.""" if not relation.get_config().should_check_additional_housenumbers(): return yattag.doc.Doc() prefix = ctx.get_ini().get_uri_prefix() url = prefix + "/additional-housenumbers/" + relation.get_name( ) + "/view-result" additional_count = "" if ctx.get_file_system().path_exists( relation.get_files().get_housenumbers_additional_count_path()): path = relation.get_files().get_housenumbers_additional_count_path() additional_count = util.get_content(path).decode("utf-8").strip() doc = yattag.doc.Doc() if additional_count: date = get_last_modified( relation.get_files().get_housenumbers_additional_count_path()) with doc.tag("strong"): with doc.tag("a", href=url, title=tr("updated") + " " + date): doc.text(tr("{} house numbers").format(additional_count)) return doc with doc.tag("strong"): with doc.tag("a", href=url): doc.text(tr("additional house numbers")) return doc
def update_stats() -> None: """Performs the update of country-level stats.""" # Fetch house numbers for the whole country. logging.info("update_stats: start, updating whole-country csv") query = util.get_content( config.get_abspath("data/street-housenumbers-hungary.txt")) statedir = config.get_abspath("workdir/stats") os.makedirs(statedir, exist_ok=True) today = time.strftime("%Y-%m-%d") csv_path = os.path.join(statedir, "%s.csv" % today) retry = 0 while should_retry(retry): if retry > 0: logging.info("update_stats: try #%s", retry) retry += 1 try: overpass_sleep() response = overpass_query.overpass_query(query) with open(csv_path, "w") as stream: stream.write(response) break except urllib.error.HTTPError as http_error: logging.info("update_stats: http error: %s", str(http_error)) # Shell part. logging.info("update_stats: executing the shell part") subprocess.run([config.get_abspath("stats-daily.sh")], check=True) logging.info("update_stats: end")
def find_question_by_link(self, topic_url, count_id): content = get_content(topic_url, count_id) if content == "FAIL": return 0 soup = BeautifulSoup(content) questions = soup.findAll('a', attrs={'class': 'question_link'}) i = 0 p_str = 'INSERT IGNORE INTO QUESTION (NAME, LINK_ID, FOCUS, ANSWER, LAST_VISIT, ADD_TIME, TOP_ANSWER_NUMBER) VALUES (%s, %s, %s, %s, %s, %s, %s)' anser_list = [] time_now = int(time.time()) for question in questions: tem_text = question.get_text() tem_id = question.get('href') tem_id = tem_id.replace('/question/', '') anser_list = anser_list + [(tem_text, int(tem_id), 0, 0, 0, time_now, 0)] self.cursor.executemany(p_str, anser_list) return self.cursor.rowcount
def find_question_by_link(self, topic_url, count_id): content = get_content(topic_url, count_id) if content == "FAIL": return 0 # print content soup = BeautifulSoup(content) topic = soup.find("title").text topic = topic.replace(u" - 全部问题 - 知乎", "") print topic questions = soup.findAll("a", attrs={"class": "question_link"}) i = 0 p_str = "INSERT IGNORE INTO QUESTION (TOPIC, NAME, LINK_ID, FOCUS, ANSWER, LAST_VISIT, ADD_TIME, TOP_ANSWER_NUMBER) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)" anser_list = [] time_now = int(time.time()) for question in questions: tem_text = question.get_text() tem_id = question.get("href").replace("/question/", "") anser_list = anser_list + [(topic, tem_text, int(tem_id), 0, 0, 0, time_now, 0)] self.cursor.executemany(p_str, anser_list) return self.cursor.rowcount
def handle_main_street_additional_count( relation: areas.Relation) -> yattag.doc.Doc: """Handles the street additional count part of the main page.""" prefix = config.Config.get_uri_prefix() url = prefix + "/additional-streets/" + relation.get_name( ) + "/view-result" additional_count = "" if os.path.exists( relation.get_files().get_streets_additional_count_path()): additional_count = util.get_content( relation.get_files().get_streets_additional_count_path()).decode( "utf-8") doc = yattag.doc.Doc() if additional_count: date = get_last_modified( relation.get_files().get_streets_additional_count_path()) with doc.tag("strong"): with doc.tag("a", href=url, title=_("updated") + " " + date): doc.text(_("{} streets").format(additional_count)) return doc with doc.tag("strong"): with doc.tag("a", href=url): doc.text(_("additional streets")) return doc
def find_question_by_link(self,topic_url,count_id): content = get_content(topic_url,count_id) if content == "FAIL": return 0 soup = beautifulsoup4(content) questions = soup.findAll('a',attrs={'class':'question_link'}) i = 0 p_str = 'INSERT IGNORE INTO QUESTION (NAME, LINK_ID, FOCUS, ANSWER, LAST_VISIT, ADD_TIME, TOP_ANSWER_NUMBER) VALUES (%s, %s, %s, %s, %s, %s, %s)' anser_list = [] time_now = int(time.time()) for question in questions: tem_text = question.get_text() tem_id = question.get('href') tem_id = tem_id.replace('/question/','') anser_list = anser_list + [(tem_text, int(tem_id), 0, 0, 0, time_now, 0)] self.cursor.executemany(p_str,anser_list) return self.cursor.rowcount
def test_happy(self) -> None: """Tests the happy path.""" relations = get_relations() relation_name = "gazdagret" result_from_overpass = "******" result_from_overpass += "1\tTörökugrató utca\t1\n" result_from_overpass += "1\tTörökugrató utca\t2\n" result_from_overpass += "1\tTűzkő utca\t9\n" result_from_overpass += "1\tTűzkő utca\t10\n" result_from_overpass += "1\tOSM Name 1\t1\n" result_from_overpass += "1\tOSM Name 1\t2\n" result_from_overpass += "1\tOnly In OSM utca\t1\n" expected = util.get_content(relations.get_workdir(), "street-housenumbers-gazdagret.csv") relation = relations.get_relation(relation_name) relation.get_files().write_osm_housenumbers(result_from_overpass) actual = util.get_content(relations.get_workdir(), "street-housenumbers-gazdagret.csv") self.assertEqual(actual, expected)
def find_answers_by_question_url(self, question_url, count_id): content = get_content(question_url, count_id) if content == 'Fail': return 0 content = BeautifulSoup(content, 'lxml') question_name = content.find(name='div', id='zh-question-title') if not question_name: return 0 question_name = question_name.find('h2').get_text().strip() answer_num = content.find(name='h3', id='zh-question-answer-num') if not answer_num: return 0 answer_num = answer_num.get('data-num') get_answer_num = 10 if 0 < int(answer_num) <= get_answer_num and int(answer_num) > 0: get_answer_num = answer_num if answer_num <= 0: return 0 question_list = [] answer_detail_list = [] time_now = int(time.time()) question_id = re.sub('.*/', '', question_url) question_list = question_list + [(question_name, int(question_id), 0, 0, 0, time_now, 0)] answers = content.findAll(name='div', attrs={'class': 'zm-item-answer'}, limit=get_answer_num) for answer in answers: answer_author_info = answer.find(name='h3', attrs={'class': 'zm-item-answer-author-wrap'}).findAll('a') if not answer_author_info: continue answer_author_id = answer_author_info[1].get('href').replace('/people/', '') answer_author_name = answer_author_info[1].get_text() answer_detail = answer.find(name='div', attrs={'class': ' zm-editable-content clearfix'}) answer_detail = str(answer_detail).replace('<div class=" zm-editable-content clearfix">','').replace('</div>','').strip() if not answer_detail: continue # append list answer_detail_list = answer_detail_list + [ (answer_author_id, answer_author_name, int(question_id), answer_detail)] # insert data to DB question_sql = 'insert ignore into question (name, link_id, focus, answer, last_visit, add_time, top_answer_number) ' \ 'values (%s, %s, %s, %s, %s, %s, %s)' question__detail_sql = 'insert ignore into answer_detail (answer_author_id, answer_author_name, question_link_id,answer_detail)' \ ' values (%s, %s, %s, %s)' self.cursor.executemany(question_sql, question_list) self.cursor.executemany(question__detail_sql, answer_detail_list) self.db.commit() return self.cursor.rowcount
def convert(): if util.validate_request_subreddit(request.form) is not None: return jsonify(type='danger', text=util.validate_request_subreddit(request.form)) subreddit = request.form['subreddit'] include_comments = request.form['comments'] time = request.form['time'] limit = int(request.form['limit']) address = request.form['email'] kindle_address = request.form['kindle_address'] try: posts = util.get_posts(subreddit, time, limit) if time == 'all': title = 'Top ' + str( limit) + ' posts from /r/' + subreddit + ' ever' else: title = 'Top ' + str( limit ) + ' posts from /r/' + subreddit + ' over the past ' + time top = [] for post in posts: author = '[deleted]' if post.author is None else post.author.name comments = None if include_comments == 'true': post.comments.replace_more(limit=0) comments = util.get_comments(post, request.form['comments_style'], author) try: top.append({ 'title': post.title, 'body': util.get_content(post.url) if not post.url.startswith('https://www.reddit.com/r/') else util.markdown(post.selftext), 'author': author, 'comments': comments }) except: pass except: return jsonify(type='danger', text='That ain\'t no subreddit I\'ve ever heard of!') attachment = render_template('posts.html', posts=top, title=title) status = util.send_email(address, kindle_address, attachment, title) if status is None: return jsonify(type='success', text='Success!') else: return jsonify(type='warning', text='Uh oh! Something went wrong on our end')
def test_happy(self) -> None: """Tests the happy path.""" mock_overpass_sleep_called = False def mock_overpass_sleep() -> None: nonlocal mock_overpass_sleep_called mock_overpass_sleep_called = True result_from_overpass = "******" result_from_overpass += "1\tTörökugrató utca\t1\n" result_from_overpass += "1\tTörökugrató utca\t2\n" result_from_overpass += "1\tTűzkő utca\t9\n" result_from_overpass += "1\tTűzkő utca\t10\n" result_from_overpass += "1\tOSM Name 1\t1\n" result_from_overpass += "1\tOSM Name 1\t2\n" result_from_overpass += "1\tOnly In OSM utca\t1\n" def mock_urlopen(_url: str, _data: Optional[bytes] = None) -> BinaryIO: buf = io.BytesIO() buf.write(result_from_overpass.encode('utf-8')) buf.seek(0) return buf with unittest.mock.patch('util.get_abspath', get_abspath): with unittest.mock.patch("cron.overpass_sleep", mock_overpass_sleep): with unittest.mock.patch('urllib.request.urlopen', mock_urlopen): relations = get_relations() for relation_name in relations.get_active_names(): if relation_name != "gazdagret": relations.get_relation( relation_name).get_config().set_active(False) expected = util.get_content( relations.get_workdir(), "street-housenumbers-gazdagret.csv") os.unlink( os.path.join(relations.get_workdir(), "street-housenumbers-gazdagret.csv")) cron.update_osm_housenumbers(relations) self.assertTrue(mock_overpass_sleep_called) actual = util.get_content( relations.get_workdir(), "street-housenumbers-gazdagret.csv") self.assertEqual(actual, expected)
def test_happy(self) -> None: """Tests the happy path.""" relations = get_relations() for relation_name in relations.get_active_names(): if relation_name not in ("gazdagret", "ujbuda"): relations.get_relation(relation_name).get_config().set_active(False) path = os.path.join(relations.get_workdir(), "street-housenumbers-reference-gazdagret.lst") expected = util.get_content(path) os.unlink(path) cron.update_ref_housenumbers(relations, update=True) mtime = os.path.getmtime(path) cron.update_ref_housenumbers(relations, update=False) self.assertEqual(os.path.getmtime(path), mtime) actual = util.get_content(path) self.assertEqual(actual, expected) # Make sure housenumber ref is not created for the streets=only case. ujbuda_path = os.path.join(relations.get_workdir(), "street-housenumbers-reference-ujbuda.lst") self.assertFalse(os.path.exists(ujbuda_path))
def test_happy(self) -> None: """Tests the happy path.""" with unittest.mock.patch('config.get_abspath', get_abspath): relations = get_relations() for relation_name in relations.get_active_names(): # ujbuda is streets=only if relation_name not in ("gazdagret", "ujbuda"): relations.get_relation(relation_name).get_config().set_active(False) path = os.path.join(relations.get_workdir(), "gazdagret.percent") expected = util.get_content(path) os.unlink(path) cron.update_missing_housenumbers(relations, update=True) mtime = os.path.getmtime(path) cron.update_missing_housenumbers(relations, update=False) self.assertEqual(os.path.getmtime(path), mtime) actual = util.get_content(path) self.assertEqual(actual, expected) # Make sure housenumber stat is not created for the streets=only case. self.assertFalse(os.path.exists(os.path.join(relations.get_workdir(), "ujbuda.percent")))
def test_happy(self) -> None: """Tests the happy path.""" relations = get_relations() relation_name = "gazdagret" relation = relations.get_relation(relation_name) expected = util.get_content(relations.get_workdir(), "gazdagret.percent") ret = relation.write_missing_housenumbers() todo_street_count, todo_count, done_count, percent, table = ret self.assertEqual(todo_street_count, 3) self.assertEqual(todo_count, 5) self.assertEqual(done_count, 6) self.assertEqual(percent, '54.55') string_table = table_doc_to_string(table) self.assertEqual(string_table, [['Street name', 'Missing count', 'House numbers'], ['Törökugrató utca', '2', '7<br />10'], ['Tűzkő utca', '2', '1<br />2'], ['Hamzsabégi út', '1', '1']]) actual = util.get_content(relations.get_workdir(), "gazdagret.percent") self.assertEqual(actual, expected)
def test_happy(self) -> None: """Tests the happy path.""" with unittest.mock.patch('config.get_abspath', get_abspath): relations = get_relations() for relation_name in relations.get_active_names(): # gellerthegy is streets=no if relation_name not in ("gazdagret", "gellerthegy"): relations.get_relation(relation_name).get_config().set_active(False) path = os.path.join(relations.get_workdir(), "streets-reference-gazdagret.lst") expected = util.get_content(path) os.unlink(path) cron.update_ref_streets(relations, update=True) mtime = os.path.getmtime(path) cron.update_ref_streets(relations, update=False) self.assertEqual(os.path.getmtime(path), mtime) actual = util.get_content(path) self.assertEqual(actual, expected) # Make sure street ref is not created for the streets=no case. ujbuda_path = os.path.join(relations.get_workdir(), "streets-reference-gellerthegy.lst") self.assertFalse(os.path.exists(ujbuda_path))
def find_question_by_link(self, topic_url, count_id): content = get_content(topic_url, count_id) if content == "FAIL": return 0 soup = BeautifulSoup(content, 'lxml') question_links = soup.findAll('a', attrs={'class': 'question_link'}) rowcount = 0 for question_link in question_links: # get question id and name question_link = question_link.get('href') if question_link: question_url = 'http://www.zhihu.com' + question_link rowcount += self.find_answers_by_question_url(question_url, count_id) return rowcount
def convert(): if util.validate_request_subreddit(request.form) is not None: return jsonify(type='danger', text=util.validate_request_subreddit(request.form)) subreddit = request.form['subreddit'] include_comments = request.form['comments'] time = request.form['time'] limit = int(request.form['limit']) address = request.form['email'] kindle_address = request.form['kindle_address'] try: posts = util.get_posts(subreddit, time, limit) if time == 'all': title = 'Top ' + str(limit) + ' posts from /r/' + subreddit + ' ever' else: title = 'Top ' + str(limit) + ' posts from /r/' + subreddit + ' over the past ' + time top = [] for post in posts: author = '[deleted]' if post.author is None else post.author.name comments = None if include_comments == 'true': post.comments.replace_more(limit=0) comments = util.get_comments(post, request.form['comments_style'], author) try: top.append({'title': post.title, 'body': util.get_content(post.url) if not post.url.startswith( 'https://www.reddit.com/r/') else util.markdown( post.selftext), 'author': author, 'comments': comments}) except: pass except: return jsonify(type='danger', text='That ain\'t no subreddit I\'ve ever heard of!') attachment = render_template('posts.html', posts=top, title=title) status = util.send_email(address, kindle_address, attachment, title) if status is None: return jsonify(type='success', text='Success!') else: return jsonify(type='warning', text='Uh oh! Something went wrong on our end')
def find_question_by_link(self,topic_url,count_id): content = get_content(topic_url,count_id) if content == "FAIL": return 0 soup = BeautifulSoup(content) questions = soup.findAll('div',attrs={'class':'feed-item'}) i = 0 p_str = 'INSERT IGNORE INTO QUESTION (NAME, LINK_ID, FOCUS, ANSWER, LAST_VISIT, ADD_TIME, TOP_ANSWER_NUMBER, ACTIVATE, REVIEW, FIRST_COMMENT) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)' anser_list = [] time_now = int(time.time()) for question in questions: # Do not add questions that are more than 5 days old quesTag = question.find('span', attrs={'class':'time'}) if quesTag is not None: quesTime = quesTag.get_text() else: break m = re.search(u'周|月|年', quesTime) if m is not None: break quesLink = question.find('a', attrs={'class':'question_link'}) tem_text = quesLink.get_text() tem_id = quesLink.get('href') tem_id = tem_id.replace('/question/','') anser_list = anser_list + [(tem_text, int(tem_id), 0, 0, 0, time_now, 0, 0, 0, 0)] self.cursor.executemany(p_str,anser_list) return len(anser_list)
from itertools import permutations from util import is_prime, get_content def solve(words): max_length = max([len(word) for word in words]) max_value = 26 * max_length trians = [] for i in xrange(1, max_value): n = i * (i + 1) / 2 if n > max_value: break trians.append(n) print trians c = 0 for word in words: if sum([ord(w)-64 for w in word]) in trians: c += 1 return c if __name__ == "__main__": import cProfile text = get_content("http://projecteuler.net/project/words.txt") words = [word.replace('"', '').strip() for word in text.split(',')] cProfile.run("print solve(words)")
from util import get_content def score_name(name): return sum([ord(c) - 64 for c in name]) def solve(names): s = 0 for i, name in enumerate(names): s += sum([ord(c) - 64 for c in name]) * (i + 1) return s if __name__ == "__main__": import cProfile text = get_content("http://projecteuler.net/project/names.txt") names = [name.replace('"', '').strip() for name in text.split(',')] names.sort() cProfile.run('print solve(names)')
def update(self,link_id,count_id): time_now = int(time.time()) questionUrl = 'http://www.zhihu.com/question/' + link_id content = get_content(questionUrl,count_id) if content == "FAIL": sql = "UPDATE QUESTION SET LAST_VISIT = %s WHERE LINK_ID = %s" self.cursor.execute(sql,(time_now,link_id)) return soup = BeautifulSoup(content) # There are 3 numbers in this format # Focus, Last Activated and Review numbers = soup.findAll('div',attrs={'class':'zg-gray-normal'}) if len(numbers) != 3: print "LINK_ID:" + link_id + "Does not have 3 numbers" return focus = numbers[0] activate = numbers[1] review = numbers[2] # Find out how many people focus this question. m = re.search(r'<strong>(.*?)</strong>', str(focus)) if m == None: focus_amount = '0' else: focus_amount = m.group(1) # Find out when is this question last activated m = re.search(r'>(.*?)<', str(activate)) if m == None: activate_time = u'Unknown' else: activate_time = get_time(m.group(1)) # Find out how many people reviewed this question m = re.search(r'<strong>(.*?)</strong>', str(review)) if m == None: review_amount = '0' else: review_amount = m.group(1) # Find out how many people answered this question. answer_amount = soup.find('h3',attrs={'id':'zh-question-answer-num'}) if answer_amount != None: answer_amount = answer_amount.get_text().replace(u' 个回答','') else: answer_amount = soup.find('div',attrs={'class':'zm-item-answer'}) if answer_amount != None: answer_amount = u'1' else: answer_amount = u'0' # Find out the top answer's vote amount. top_answer = soup.findAll('span',attrs={'class':'count'}) if top_answer == []: top_answer_votes = 0 else: top_answer_votes = 0 for t in top_answer: t = t.get_text() t = t.replace('K','000') t = int(t) if t > top_answer_votes: top_answer_votes = t # Find out the first commend date. comment_dates = soup.findAll('a',class_="answer-date-link") if comment_dates == []: first_comment_time = 0 else: times = map(get_time, comment_dates) first_comment_time = min(times) # print it to check if everything is good. if count_id % 1 == 0: print str(count_id) + " , " + self.getName() + " Update QUESTION set FOCUS = " + focus_amount + " , ANSWER = " + answer_amount + ", LAST_VISIT = " + str(time_now) + ", TOP_ANSWER_NUMBER = " + str(top_answer_votes) + " where LINK_ID = " + link_id #print str(count_id) + " , " + self.getName() + " Update QUESTION set FOCUS = " + focus_amount + " , ANSWER = " + answer_amount + ", LAST_VISIT = " + str(time_now) + ", TOP_ANSWER_NUMBER = " + str(top_answer_votes) + " where LINK_ID = " + link_id # Update this question sql = "UPDATE QUESTION SET FOCUS = %s , ANSWER = %s, LAST_VISIT = %s, TOP_ANSWER_NUMBER = %s , ACTIVATE = %s, REVIEW = %s , FIRST_COMMENT = %s WHERE LINK_ID = %s" self.cursor.execute(sql,(focus_amount,answer_amount,time_now,top_answer_votes,activate_time, review_amount, first_comment_time, link_id)) # Find out the topics related to this question topics = soup.findAll('a',attrs={'class':'zm-item-tag'}) sql_str = "INSERT IGNORE INTO TOPIC (NAME, LAST_VISIT, LINK_ID, ADD_TIME, PRIORITY) VALUES (%s, %s, %s, %s, %s)" topicList = [] for topic in topics: topicName = topic.get_text().replace('\n','') topicUrl = topic.get('href').replace('/topic/','') #sql_str = sql_str + "('" + topicName + "',0," + topicUrl + "," + str(time_now) + ")," topicList = topicList + [(topicName, 0, topicUrl, time_now, 0)] self.cursor.executemany(sql_str,topicList)
def update(self,link_id,count_id): time_now = int(time.time()) questionUrl = 'http://www.zhihu.com/question/' + link_id content = get_content(questionUrl,count_id) if content == "FAIL": sql = "UPDATE QUESTION SET LAST_VISIT = %s WHERE LINK_ID = %s" self.cursor.execute(sql,(time_now,link_id)) return soup = BeautifulSoup(content) questions = soup.find('div',attrs={'class':'zg-gray-normal'}) # Find out how many people focus this question. if questions == None: return else: focus_amount = questions.getText().replace('\n','') focus_amount = focus_amount.replace(u'人关注该问题','') focus_amount = focus_amount.replace(u'关注','') if focus_amount == u'问题还没有': focus_amount = u'0' focus_amount = focus_amount.replace(u'问题','') if focus_amount == u'\\xe8\\xbf\\x98\\xe6\\xb2\\xa1\\xe6\\x9c\\x89': # This is a special case. return # Find out how many people answered this question. answer_amount = soup.find('h3',attrs={'id':'zh-question-answer-num'}) if answer_amount != None: answer_amount = answer_amount.getText().replace(u' 个回答','') else: answer_amount = soup.find('div',attrs={'class':'zm-item-answer'}) if answer_amount != None: answer_amount = u'1' else: answer_amount = u'0' # Find out the top answer's vote amount. top_answer = soup.findAll('span',attrs={'class':'count'}) if top_answer == []: top_answer_votes = 0 else: top_answer_votes = 0 for t in top_answer: t = t.getText() t = t.replace('K','000') t = int(t) if t > top_answer_votes: top_answer_votes = t # print it to check if everything is good. if count_id % 1 == 0: print str(count_id) + " , " + self.getName() + " Update QUESTION set FOCUS = " + focus_amount + " , ANSWER = " + answer_amount + ", LAST_VISIT = " + str(time_now) + ", TOP_ANSWER_NUMBER = " + str(top_answer_votes) + " where LINK_ID = " + link_id #print str(count_id) + " , " + self.getName() + " Update QUESTION set FOCUS = " + focus_amount + " , ANSWER = " + answer_amount + ", LAST_VISIT = " + str(time_now) + ", TOP_ANSWER_NUMBER = " + str(top_answer_votes) + " where LINK_ID = " + link_id # Update this question sql = "UPDATE QUESTION SET FOCUS = %s , ANSWER = %s, LAST_VISIT = %s, TOP_ANSWER_NUMBER = %s WHERE LINK_ID = %s" self.cursor.execute(sql,(focus_amount,answer_amount,time_now,top_answer_votes,link_id)) # Find out the topics related to this question topics = soup.findAll('a',attrs={'class':'zm-item-tag'}) if questions != None: sql_str = "INSERT IGNORE INTO TOPIC (NAME, LAST_VISIT, LINK_ID, ADD_TIME) VALUES (%s, %s, %s, %s)" topicList = [] for topic in topics: topicName = topic.getText().replace('\n','') topicUrl = topic.get('href').replace('/topic/','') #sql_str = sql_str + "('" + topicName + "',0," + topicUrl + "," + str(time_now) + ")," topicList = topicList + [(topicName, 0, topicUrl, time_now)] self.cursor.executemany(sql_str,topicList)
continue random_recipients = [] all_recipients = eggz.get('recipients') for i in range(0, SEND_LIMIT_PER_USER): random_recipients.append(random.choice(all_recipients)) for recipient in random_recipients: c.send_mail(smtp, recipient) smtp.close() if __name__ == "__main__": queue = Queue() pool = Pool(PROCESSES, worker, (queue,)) credentials = get_credentials() subject = get_subject() content = get_content() recipients = get_recipients() attachments = get_attachments() for x in range(0, HOW_MANY_LOGINS): credential = random.choice(credentials) args = {'smtp_server': credential.get('smtp_server'), 'port': credential.get('port'), 'username': credential.get('username'), 'password': credential.get('password'), 'ssl': credential.get('ssl'), 'starttls': credential.get('starttls'), 'subject': subject, 'content': content, 'recipients': recipients, 'attachments': attachments}