Example #1
0
    def test_happy(self) -> None:
        """Tests the happy path."""
        mock_overpass_sleep_called = False

        def mock_overpass_sleep() -> None:
            nonlocal mock_overpass_sleep_called
            mock_overpass_sleep_called = True

        result_from_overpass = "******"

        def mock_urlopen(_url: str, _data: Optional[bytes] = None) -> BinaryIO:
            buf = io.BytesIO()
            buf.write(result_from_overpass.encode('utf-8'))
            buf.seek(0)
            return buf

        with unittest.mock.patch("cron.overpass_sleep", mock_overpass_sleep):
            with unittest.mock.patch('urllib.request.urlopen', mock_urlopen):
                relations = get_relations()
                for relation_name in relations.get_active_names():
                    if relation_name != "gazdagret":
                        relations.get_relation(relation_name).get_config().set_active(False)
                expected = util.get_content(relations.get_workdir(), "streets-gazdagret.csv")
                path = os.path.join(relations.get_workdir(), "streets-gazdagret.csv")
                os.unlink(path)
                cron.update_osm_streets(relations, update=True)
                mtime = os.path.getmtime(path)
                cron.update_osm_streets(relations, update=False)
                self.assertEqual(os.path.getmtime(path), mtime)
                self.assertTrue(mock_overpass_sleep_called)
                actual = util.get_content(relations.get_workdir(), "streets-gazdagret.csv")
                self.assertEqual(actual, expected)
Example #2
0
 def test_http_error(self) -> None:
     """Tests the case when we keep getting HTTP errors."""
     ctx = test_context.make_test_context()
     routes: List[test_context.URLRoute] = [
         test_context.URLRoute(
             url="https://overpass-api.de/api/status",
             data_path="",
             result_path="tests/network/overpass-status-happy.txt"),
         test_context.URLRoute(
             url="https://overpass-api.de/api/interpreter",
             data_path="",
             result_path=""),
     ]
     network = test_context.TestNetwork(routes)
     ctx.set_network(network)
     relations = areas.Relations(ctx)
     for relation_name in relations.get_active_names():
         if relation_name != "gazdagret":
             relations.get_relation(relation_name).get_config().set_active(
                 False)
     expected = util.get_content(relations.get_workdir(),
                                 "streets-gazdagret.csv")
     cron.update_osm_streets(ctx, relations, update=True)
     # Make sure that in case we keep getting errors we give up at some stage and
     # leave the last state unchanged.
     actual = util.get_content(relations.get_workdir(),
                               "streets-gazdagret.csv")
     self.assertEqual(actual, expected)
Example #3
0
 def test_happy(self) -> None:
     """Tests the happy path."""
     ctx = test_context.make_test_context()
     routes: List[test_context.URLRoute] = [
         test_context.URLRoute(
             url="https://overpass-api.de/api/status",
             data_path="",
             result_path="tests/network/overpass-status-happy.txt"),
         test_context.URLRoute(
             url="https://overpass-api.de/api/interpreter",
             data_path="",
             result_path="tests/network/overpass-streets-gazdagret.csv"),
     ]
     network = test_context.TestNetwork(routes)
     ctx.set_network(network)
     relations = areas.Relations(ctx)
     for relation_name in relations.get_active_names():
         if relation_name != "gazdagret":
             relations.get_relation(relation_name).get_config().set_active(
                 False)
     expected = util.get_content(relations.get_workdir(),
                                 "streets-gazdagret.csv")
     path = os.path.join(relations.get_workdir(), "streets-gazdagret.csv")
     os.unlink(path)
     cron.update_osm_streets(ctx, relations, update=True)
     mtime = os.path.getmtime(path)
     cron.update_osm_streets(ctx, relations, update=False)
     self.assertEqual(os.path.getmtime(path), mtime)
     actual = util.get_content(relations.get_workdir(),
                               "streets-gazdagret.csv")
     self.assertEqual(actual, expected)
Example #4
0
 def test_happy(self) -> None:
     """Tests the happy path."""
     with unittest.mock.patch('util.get_abspath', get_abspath):
         relations = get_relations()
         for relation_name in relations.get_active_names():
             if relation_name not in ("gazdagret", "ujbuda"):
                 relations.get_relation(
                     relation_name).get_config().set_active(False)
         config = webframe.get_config()
         expected = util.get_content(
             relations.get_workdir(),
             "street-housenumbers-reference-gazdagret.lst")
         os.unlink(
             os.path.join(relations.get_workdir(),
                          "street-housenumbers-reference-gazdagret.lst"))
         cron.update_ref_housenumbers(relations, config)
         actual = util.get_content(
             relations.get_workdir(),
             "street-housenumbers-reference-gazdagret.lst")
         self.assertEqual(actual, expected)
         # Make sure housenumber ref is not created for the streets=only case.
         ujbuda_path = os.path.join(
             relations.get_workdir(),
             "street-housenumbers-reference-ujbuda.lst")
         self.assertFalse(os.path.exists(ujbuda_path))
Example #5
0
    def test_happy(self) -> None:
        """Tests the happy path."""
        ctx = test_context.make_test_context()
        ctx.set_time(test_context.make_test_time())
        routes: List[test_context.URLRoute] = [
            test_context.URLRoute(
                url="https://overpass-api.de/api/status",
                data_path="",
                result_path="tests/network/overpass-status-happy.txt"),
            test_context.URLRoute(
                url="https://overpass-api.de/api/interpreter",
                data_path="",
                result_path="tests/network/overpass-stats.csv"),
        ]
        network = test_context.TestNetwork(routes)
        ctx.set_network(network)

        # Create a CSV that is definitely old enough to be removed.
        old_path = ctx.get_abspath("workdir/stats/old.csv")
        create_old_file(old_path)

        today = time.strftime("%Y-%m-%d")
        path = ctx.get_abspath("workdir/stats/%s.csv" % today)
        cron.update_stats(ctx, overpass=True)
        actual = util.get_content(path)
        self.assertEqual(actual,
                         util.get_content("tests/network/overpass-stats.csv"))

        # Make sure that the old CSV is removed.
        self.assertFalse(os.path.exists(old_path))

        with open(ctx.get_abspath("workdir/stats/ref.count"), "r") as stream:
            num_ref = int(stream.read().strip())
        self.assertEqual(num_ref, 300)
Example #6
0
 def test_happy(self) -> None:
     """Tests the happy path."""
     relations = get_relations()
     for relation_name in relations.get_active_names():
         # gellerthegy is streets=no
         if relation_name not in ("gazdagret", "gellerthegy"):
             relations.get_relation(relation_name).get_config().set_active(
                 False)
     path = os.path.join(relations.get_workdir(),
                         "gazdagret-additional-streets.count")
     expected = "1"
     if os.path.exists(path):
         util.get_content(path)
         os.unlink(path)
     cron.update_additional_streets(relations, update=True)
     mtime = os.path.getmtime(path)
     cron.update_additional_streets(relations, update=False)
     self.assertEqual(os.path.getmtime(path), mtime)
     actual = util.get_content(path)
     self.assertEqual(actual, expected)
     # Make sure street stat is not created for the streets=no case.
     self.assertFalse(
         os.path.exists(
             os.path.join(relations.get_workdir(),
                          "gellerthegy-additional-streets.count")))
Example #7
0
def handle_static(
        request_uri: str) -> Tuple[bytes, str, List[Tuple[str, str]]]:
    """Handles serving static content."""
    tokens = request_uri.split("/")
    path = tokens[-1]
    extra_headers: List[Tuple[str, str]] = []

    if request_uri.endswith(".js"):
        content_type = "application/x-javascript"
        content = util.get_content(config.Config.get_workdir(), path,
                                   extra_headers)
        return content, content_type, extra_headers
    if request_uri.endswith(".css"):
        content_type = "text/css"
        content = util.get_content(config.get_abspath("static"), path,
                                   extra_headers)
        return content, content_type, extra_headers
    if request_uri.endswith(".json"):
        content_type = "application/json"
        content = util.get_content(
            os.path.join(config.Config.get_workdir(), "stats"), path,
            extra_headers)
        return content, content_type, extra_headers
    if request_uri.endswith(".ico"):
        content_type = "image/x-icon"
        content = util.get_content(config.get_abspath(""), path, extra_headers)
        return content, content_type, extra_headers

    return bytes(), "", extra_headers
Example #8
0
    def test_http_error(self) -> None:
        """Tests the case when we keep getting HTTP errors."""
        mock_overpass_sleep_called = False

        def mock_overpass_sleep() -> None:
            nonlocal mock_overpass_sleep_called
            mock_overpass_sleep_called = True

        with unittest.mock.patch("cron.overpass_sleep", mock_overpass_sleep):
            with unittest.mock.patch('urllib.request.urlopen',
                                     mock_urlopen_raise_error):
                relations = get_relations()
                for relation_name in relations.get_active_names():
                    if relation_name != "gazdagret":
                        relations.get_relation(
                            relation_name).get_config().set_active(False)
                expected = util.get_content(relations.get_workdir(),
                                            "streets-gazdagret.csv")
                cron.update_osm_streets(relations, update=True)
                self.assertTrue(mock_overpass_sleep_called)
                # Make sure that in case we keep getting errors we give up at some stage and
                # leave the last state unchanged.
                actual = util.get_content(relations.get_workdir(),
                                          "streets-gazdagret.csv")
                self.assertEqual(actual, expected)
Example #9
0
 def test_happy(self) -> None:
     """Tests the happy path."""
     refpath = config.get_abspath(os.path.join("refdir", "utcak_20190514.tsv"))
     relations = get_relations()
     relation_name = "gazdagret"
     relation = relations.get_relation(relation_name)
     expected = util.get_content(relations.get_workdir(), "streets-reference-gazdagret.lst")
     relation.write_ref_streets(refpath)
     actual = util.get_content(relations.get_workdir(), "streets-reference-gazdagret.lst")
     self.assertEqual(actual, expected)
Example #10
0
 def test_happy(self) -> None:
     """Tests the happy path."""
     relations = get_relations()
     relation_name = "gazdagret"
     relation = relations.get_relation(relation_name)
     result_from_overpass = "******"
     expected = util.get_content(relations.get_workdir(), "streets-gazdagret.csv")
     relation.get_files().write_osm_streets(result_from_overpass)
     actual = util.get_content(relations.get_workdir(), "streets-gazdagret.csv")
     self.assertEqual(actual, expected)
    def test_happy(self) -> None:
        """Tests the happy path."""
        expected = util.get_content(config.get_abspath("workdir/street-housenumbers-reference-gazdagret.lst"))

        argv = ["", "gazdagret"]
        with unittest.mock.patch('sys.argv', argv):
            get_reference_housenumbers.main()

        actual = util.get_content(config.get_abspath("workdir/street-housenumbers-reference-gazdagret.lst"))
        self.assertEqual(actual, expected)
Example #12
0
 def test_happy(self) -> None:
     """Tests the happy path."""
     refdir = os.path.join(os.path.dirname(__file__), "refdir")
     refpath = os.path.join(refdir, "hazszamok_20190511.tsv")
     refpath2 = os.path.join(refdir, "hazszamok_kieg_20190808.tsv")
     relations = get_relations()
     relation_name = "gazdagret"
     expected = util.get_content(relations.get_workdir(), "street-housenumbers-reference-gazdagret.lst")
     relation = relations.get_relation(relation_name)
     relation.write_ref_housenumbers([refpath, refpath2])
     actual = util.get_content(relations.get_workdir(), "street-housenumbers-reference-gazdagret.lst")
     self.assertEqual(actual, expected)
Example #13
0
 def test_happy(self) -> None:
     """Tests the happy path."""
     relations = get_relations()
     relation_name = "gazdagret"
     relation = relations.get_relation(relation_name)
     expected = util.get_content(relations.get_workdir(), "gazdagret-streets.percent")
     ret = relation.write_missing_streets()
     todo_count, done_count, percent, streets = ret
     self.assertEqual(todo_count, 1)
     self.assertEqual(done_count, 4)
     self.assertEqual(percent, '80.00')
     self.assertEqual(streets, ['Only In Ref utca'])
     actual = util.get_content(relations.get_workdir(), "gazdagret-streets.percent")
     self.assertEqual(actual, expected)
Example #14
0
 def test_happy(self) -> None:
     """Tests the happy path."""
     with unittest.mock.patch('util.get_abspath', get_abspath):
         relations = get_relations()
         for relation_name in relations.get_active_names():
             # gellerthegy is streets=no
             if relation_name not in ("gazdagret", "gellerthegy"):
                 relations.get_relation(relation_name).get_config().set_active(False)
         expected = util.get_content(relations.get_workdir(), "gazdagret-streets.percent")
         os.unlink(os.path.join(relations.get_workdir(), "gazdagret-streets.percent"))
         cron.update_missing_streets(relations)
         actual = util.get_content(relations.get_workdir(), "gazdagret-streets.percent")
         self.assertEqual(actual, expected)
         # Make sure street stat is not created for the streets=no case.
         self.assertFalse(os.path.exists(os.path.join(relations.get_workdir(), "gellerthegy-streets.percent")))
    def test_happy(self) -> None:
        """Tests the happy path."""
        def get_abspath(path: str) -> str:
            if os.path.isabs(path):
                return path
            return os.path.join(os.path.dirname(__file__), path)
        with unittest.mock.patch('config.get_abspath', get_abspath):
            expected = util.get_content(get_abspath("workdir/street-housenumbers-reference-gazdagret.lst"))

            argv = ["", "gazdagret"]
            with unittest.mock.patch('sys.argv', argv):
                get_reference_housenumbers.main()

            actual = util.get_content(get_abspath("workdir/street-housenumbers-reference-gazdagret.lst"))
            self.assertEqual(actual, expected)
Example #16
0
def thread():
    if util.validate_request_post(request.form) is not None:
        return jsonify(type='danger', text=util.validate_request_post(request.form))

    try:
        submission = util.r.submission(url=request.form['submission'])
    except:
        return jsonify(type='danger', text='That wasn\'t a reddit link, was it?')

    if not submission.url.startswith('https://www.reddit.com/r/'):
        body = util.get_content(submission.url)
    else:
        body = util.markdown(submission.selftext, output_format='html5')
    title = submission.title
    author = "[deleted]"
    if submission.author is not None:
        author = submission.author.name
    address = request.form['email']
    kindle_address = request.form['kindle_address']

    comments = None
    if request.form['comments'] == 'true':
        submission.comments.replace_more(limit=0)
        comments = util.get_comments(submission, request.form['comments_style'], author)

    attachment = render_template('comments.html', title=title, body=body, author=author,
                                 comments=comments)

    status = util.send_email(address, kindle_address, attachment, title)

    if status is None:
        return jsonify(type='success', text='Success!')
    else:
        return jsonify(type='warning', text='Uh oh! Something went wrong on our end')
Example #17
0
    def find_question_by_link(self, topic_url, count_id):
        content = get_content(topic_url, count_id)

        if content in ["FAIL", "NO FOUND"]:
            return 0

        soup = BeautifulSoup(content)
        questions = soup.findAll(
            'div', attrs={'class': 'feed-item feed-item-hook question-item'})
        i = 0
        p_str = 'INSERT IGNORE INTO QUESTION (TOPIC_URL, NAME, LINK_ID, FOCUS, ANSWER, LAST_VISIT, ADD_TIME, TOP_ANSWER_NUMBER) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)'
        anser_list = []
        time_now = int(time.time())

        for question in questions:
            # print question
            temp_question = question.find('a',
                                          attrs={'class': 'question_link'})
            answer_amount = question.find('meta',
                                          attrs={'itemprop': 'answerCount'})
            answer_amount = answer_amount.get('content')
            # answer_amount = answer_focus_amount[0].get_text()
            # focus_amount = answer_focus_amount[1].get_text()
            tem_text = temp_question.get_text()
            tem_id = temp_question.get('href')
            tem_id = tem_id.replace('/question/', '')

            anser_list = anser_list + [(topic_url, tem_text, int(tem_id), 0,
                                        int(answer_amount), 0, time_now, 0)]

        self.cursor.executemany(p_str, anser_list)

        return self.cursor.rowcount
Example #18
0
    def getUniversityStudent(self, inputid):
        nameList = []
        time_now = int(time.time())
        inputUrl = self.homepage + inputid + self.infopage
        tmpContent = get_content(inputUrl)
        soup = bs(tmpContent.text)
        time.sleep(1)
        divlabel = soup.find_all('div', 'tip')
        '''
        try:
            self.personalInfo = divlabel[0].next_sibling.get_text('|',strip=True)
            self.schoolInfo = divlabel[1].next_sibling.get_text()
            #mySchool = u'安徽医科大'
            mySchool = self.school
            if mySchool in self.schoolInfo:
                name,sex,hometown = impRe(self.personalInfo)
                self.nameList = self.nameList +[(name,0,inputid,time_now,sex, hometown)]
        except:
            pass
        '''

        self.personalInfo = divlabel[0].next_sibling.get_text('|', strip=True)
        self.schoolInfo = divlabel[1].next_sibling.get_text()
        mySchool = self.school
        if mySchool in self.schoolInfo:
            name, sex, hometown = impRe(self.personalInfo)
            self.nameList = self.nameList + [
                (name, 0, inputid, time_now, sex, hometown)
            ]
Example #19
0
    def run(self):
        url = "http://www.zhihu.com/people/" + self.zh_username + "/topics"
        content = get_content(url, 1)
        time_now = time.time()
        if content == "FAIL":
            print "Fail to open the url:" + url
            return
        else:
            soup = BeautifulSoup(content)
            results = soup.findAll('div', attrs={'class':'zm-profile-section-main'})
            if results == None or len(results) == 0:
                print "No topic is found!"
                return
            for result in results:
                m = re.search(r'topic/(\d*)', str(result))
                if m == None:
                    print "No matching topic"
                    continue
                else:
                    link_id = m.group(1)

                m = re.search(r'<strong>(.*?)</strong>', str(result))
                if m == None:
                    print "No topic name!"
                    continue
                else:
                    name = m.group(1)
                print link_id, name
                sqlcmd = "INSERT IGNORE INTO TOPIC (NAME, LAST_VISIT, LINK_ID, ADD_TIME) VALUES (%s, %s, %s, %s)" 
                self.cursor.execute(sqlcmd, (name, 0, link_id, time_now))
Example #20
0
def handle_main_housenr_percent(
        relation: areas.Relation) -> Tuple[yattag.doc.Doc, str]:
    """Handles the house number percent part of the main page."""
    prefix = config.Config.get_uri_prefix()
    url = prefix + "/missing-housenumbers/" + relation.get_name(
    ) + "/view-result"
    percent = "N/A"
    if os.path.exists(relation.get_files().get_housenumbers_percent_path()):
        percent = util.get_content(
            relation.get_files().get_housenumbers_percent_path()).decode(
                "utf-8")

    doc = yattag.doc.Doc()
    if percent != "N/A":
        date = get_last_modified(
            relation.get_files().get_housenumbers_percent_path())
        with doc.tag("strong"):
            with doc.tag("a", href=url, title=_("updated") + " " + date):
                doc.text(util.format_percent(percent))
        return doc, percent

    with doc.tag("strong"):
        with doc.tag("a", href=url):
            doc.text(_("missing house numbers"))
    return doc, "0"
Example #21
0
def handle_main_street_percent(
        ctx: context.Context,
        relation: areas.Relation) -> Tuple[yattag.doc.Doc, str]:
    """Handles the street percent part of the main page."""
    prefix = ctx.get_ini().get_uri_prefix()
    url = prefix + "/missing-streets/" + relation.get_name() + "/view-result"
    percent = "N/A"
    if ctx.get_file_system().path_exists(
            relation.get_files().get_streets_percent_path()):
        percent = util.get_content(
            relation.get_files().get_streets_percent_path()).decode("utf-8")

    doc = yattag.doc.Doc()
    if percent != "N/A":
        date = get_last_modified(
            relation.get_files().get_streets_percent_path())
        with doc.tag("strong"):
            with doc.tag("a", href=url, title=tr("updated") + " " + date):
                doc.text(util.format_percent(percent))
        return doc, percent

    with doc.tag("strong"):
        with doc.tag("a", href=url):
            doc.text(tr("missing streets"))
    return doc, "0"
Example #22
0
def handle_main_housenr_additional_count(
        ctx: context.Context, relation: areas.Relation) -> yattag.doc.Doc:
    """Handles the housenumber additional count part of the main page."""
    if not relation.get_config().should_check_additional_housenumbers():
        return yattag.doc.Doc()

    prefix = ctx.get_ini().get_uri_prefix()
    url = prefix + "/additional-housenumbers/" + relation.get_name(
    ) + "/view-result"
    additional_count = ""
    if ctx.get_file_system().path_exists(
            relation.get_files().get_housenumbers_additional_count_path()):
        path = relation.get_files().get_housenumbers_additional_count_path()
        additional_count = util.get_content(path).decode("utf-8").strip()

    doc = yattag.doc.Doc()
    if additional_count:
        date = get_last_modified(
            relation.get_files().get_housenumbers_additional_count_path())
        with doc.tag("strong"):
            with doc.tag("a", href=url, title=tr("updated") + " " + date):
                doc.text(tr("{} house numbers").format(additional_count))
        return doc

    with doc.tag("strong"):
        with doc.tag("a", href=url):
            doc.text(tr("additional house numbers"))
    return doc
Example #23
0
def update_stats() -> None:
    """Performs the update of country-level stats."""

    # Fetch house numbers for the whole country.
    logging.info("update_stats: start, updating whole-country csv")
    query = util.get_content(
        config.get_abspath("data/street-housenumbers-hungary.txt"))
    statedir = config.get_abspath("workdir/stats")
    os.makedirs(statedir, exist_ok=True)
    today = time.strftime("%Y-%m-%d")
    csv_path = os.path.join(statedir, "%s.csv" % today)

    retry = 0
    while should_retry(retry):
        if retry > 0:
            logging.info("update_stats: try #%s", retry)
        retry += 1
        try:
            overpass_sleep()
            response = overpass_query.overpass_query(query)
            with open(csv_path, "w") as stream:
                stream.write(response)
            break
        except urllib.error.HTTPError as http_error:
            logging.info("update_stats: http error: %s", str(http_error))

    # Shell part.
    logging.info("update_stats: executing the shell part")
    subprocess.run([config.get_abspath("stats-daily.sh")], check=True)

    logging.info("update_stats: end")
Example #24
0
    def find_question_by_link(self, topic_url, count_id):
        content = get_content(topic_url, count_id)

        if content == "FAIL":
            return 0

        soup = BeautifulSoup(content)

        questions = soup.findAll('a', attrs={'class': 'question_link'})

        i = 0
        p_str = 'INSERT IGNORE INTO QUESTION (NAME, LINK_ID, FOCUS, ANSWER, LAST_VISIT, ADD_TIME, TOP_ANSWER_NUMBER) VALUES (%s, %s, %s, %s, %s, %s, %s)'
        anser_list = []
        time_now = int(time.time())

        for question in questions:
            tem_text = question.get_text()
            tem_id = question.get('href')
            tem_id = tem_id.replace('/question/', '')

            anser_list = anser_list + [(tem_text, int(tem_id), 0, 0, 0, time_now, 0)]

        self.cursor.executemany(p_str, anser_list)

        return self.cursor.rowcount
Example #25
0
    def find_question_by_link(self, topic_url, count_id):
        content = get_content(topic_url, count_id)

        if content == "FAIL":
            return 0
        # print content
        soup = BeautifulSoup(content)
        topic = soup.find("title").text
        topic = topic.replace(u" - 全部问题 - 知乎", "")
        print topic
        questions = soup.findAll("a", attrs={"class": "question_link"})

        i = 0
        p_str = "INSERT IGNORE INTO QUESTION (TOPIC, NAME, LINK_ID, FOCUS, ANSWER, LAST_VISIT, ADD_TIME, TOP_ANSWER_NUMBER) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
        anser_list = []
        time_now = int(time.time())

        for question in questions:
            tem_text = question.get_text()
            tem_id = question.get("href").replace("/question/", "")

            anser_list = anser_list + [(topic, tem_text, int(tem_id), 0, 0, 0, time_now, 0)]

        self.cursor.executemany(p_str, anser_list)

        return self.cursor.rowcount
Example #26
0
def handle_main_street_additional_count(
        relation: areas.Relation) -> yattag.doc.Doc:
    """Handles the street additional count part of the main page."""
    prefix = config.Config.get_uri_prefix()
    url = prefix + "/additional-streets/" + relation.get_name(
    ) + "/view-result"
    additional_count = ""
    if os.path.exists(
            relation.get_files().get_streets_additional_count_path()):
        additional_count = util.get_content(
            relation.get_files().get_streets_additional_count_path()).decode(
                "utf-8")

    doc = yattag.doc.Doc()
    if additional_count:
        date = get_last_modified(
            relation.get_files().get_streets_additional_count_path())
        with doc.tag("strong"):
            with doc.tag("a", href=url, title=_("updated") + " " + date):
                doc.text(_("{} streets").format(additional_count))
        return doc

    with doc.tag("strong"):
        with doc.tag("a", href=url):
            doc.text(_("additional streets"))
    return doc
Example #27
0
    def find_question_by_link(self,topic_url,count_id):
        content = get_content(topic_url,count_id)

        if content == "FAIL":
            return 0

        soup = beautifulsoup4(content)

        questions = soup.findAll('a',attrs={'class':'question_link'})

        i = 0
        p_str = 'INSERT IGNORE INTO QUESTION (NAME, LINK_ID, FOCUS, ANSWER, LAST_VISIT, ADD_TIME, TOP_ANSWER_NUMBER) VALUES (%s, %s, %s, %s, %s, %s, %s)'
        anser_list = []
        time_now = int(time.time())

        for question in questions:
            tem_text = question.get_text()
            tem_id = question.get('href')
            tem_id = tem_id.replace('/question/','')

            anser_list = anser_list + [(tem_text, int(tem_id), 0, 0, 0, time_now, 0)]

        self.cursor.executemany(p_str,anser_list)

        return self.cursor.rowcount
Example #28
0
 def test_happy(self) -> None:
     """Tests the happy path."""
     relations = get_relations()
     relation_name = "gazdagret"
     result_from_overpass = "******"
     result_from_overpass += "1\tTörökugrató utca\t1\n"
     result_from_overpass += "1\tTörökugrató utca\t2\n"
     result_from_overpass += "1\tTűzkő utca\t9\n"
     result_from_overpass += "1\tTűzkő utca\t10\n"
     result_from_overpass += "1\tOSM Name 1\t1\n"
     result_from_overpass += "1\tOSM Name 1\t2\n"
     result_from_overpass += "1\tOnly In OSM utca\t1\n"
     expected = util.get_content(relations.get_workdir(), "street-housenumbers-gazdagret.csv")
     relation = relations.get_relation(relation_name)
     relation.get_files().write_osm_housenumbers(result_from_overpass)
     actual = util.get_content(relations.get_workdir(), "street-housenumbers-gazdagret.csv")
     self.assertEqual(actual, expected)
Example #29
0
    def find_answers_by_question_url(self, question_url, count_id):
        content = get_content(question_url, count_id)
        if content == 'Fail':
            return 0
        content = BeautifulSoup(content, 'lxml')

        question_name = content.find(name='div', id='zh-question-title')
        if not question_name:
            return 0

        question_name = question_name.find('h2').get_text().strip()

        answer_num = content.find(name='h3', id='zh-question-answer-num')
        if not answer_num:
            return 0
        answer_num = answer_num.get('data-num')
        get_answer_num = 10

        if 0 < int(answer_num) <= get_answer_num and int(answer_num) > 0:
            get_answer_num = answer_num
        if answer_num <= 0:
            return 0

        question_list = []
        answer_detail_list = []
        time_now = int(time.time())
        question_id = re.sub('.*/', '', question_url)
        question_list = question_list + [(question_name, int(question_id), 0, 0, 0, time_now, 0)]

        answers = content.findAll(name='div', attrs={'class': 'zm-item-answer'}, limit=get_answer_num)
        for answer in answers:
            answer_author_info = answer.find(name='h3', attrs={'class': 'zm-item-answer-author-wrap'}).findAll('a')
            if not answer_author_info:
                continue

            answer_author_id = answer_author_info[1].get('href').replace('/people/', '')
            answer_author_name = answer_author_info[1].get_text()

            answer_detail = answer.find(name='div', attrs={'class': ' zm-editable-content clearfix'})
            answer_detail = str(answer_detail).replace('<div class=" zm-editable-content clearfix">','').replace('</div>','').strip()

            if not answer_detail:
                continue
            # append list

            answer_detail_list = answer_detail_list + [
                (answer_author_id, answer_author_name, int(question_id), answer_detail)]

        # insert data to DB
        question_sql = 'insert ignore into question (name, link_id, focus, answer, last_visit, add_time, top_answer_number) ' \
                       'values (%s, %s, %s, %s, %s, %s, %s)'
        question__detail_sql = 'insert ignore into answer_detail (answer_author_id, answer_author_name, question_link_id,answer_detail)' \
                               ' values (%s, %s, %s, %s)'
        self.cursor.executemany(question_sql, question_list)
        self.cursor.executemany(question__detail_sql, answer_detail_list)
        self.db.commit()

        return self.cursor.rowcount
Example #30
0
def convert():
    if util.validate_request_subreddit(request.form) is not None:
        return jsonify(type='danger',
                       text=util.validate_request_subreddit(request.form))

    subreddit = request.form['subreddit']
    include_comments = request.form['comments']
    time = request.form['time']
    limit = int(request.form['limit'])
    address = request.form['email']
    kindle_address = request.form['kindle_address']

    try:
        posts = util.get_posts(subreddit, time, limit)
        if time == 'all':
            title = 'Top ' + str(
                limit) + ' posts from /r/' + subreddit + ' ever'
        else:
            title = 'Top ' + str(
                limit
            ) + ' posts from /r/' + subreddit + ' over the past ' + time
        top = []
        for post in posts:
            author = '[deleted]' if post.author is None else post.author.name
            comments = None
            if include_comments == 'true':
                post.comments.replace_more(limit=0)
                comments = util.get_comments(post,
                                             request.form['comments_style'],
                                             author)
            try:
                top.append({
                    'title':
                    post.title,
                    'body':
                    util.get_content(post.url)
                    if not post.url.startswith('https://www.reddit.com/r/')
                    else util.markdown(post.selftext),
                    'author':
                    author,
                    'comments':
                    comments
                })
            except:
                pass
    except:
        return jsonify(type='danger',
                       text='That ain\'t no subreddit I\'ve ever heard of!')

    attachment = render_template('posts.html', posts=top, title=title)

    status = util.send_email(address, kindle_address, attachment, title)

    if status is None:
        return jsonify(type='success', text='Success!')
    else:
        return jsonify(type='warning',
                       text='Uh oh! Something went wrong on our end')
Example #31
0
    def test_happy(self) -> None:
        """Tests the happy path."""
        mock_overpass_sleep_called = False

        def mock_overpass_sleep() -> None:
            nonlocal mock_overpass_sleep_called
            mock_overpass_sleep_called = True

        result_from_overpass = "******"
        result_from_overpass += "1\tTörökugrató utca\t1\n"
        result_from_overpass += "1\tTörökugrató utca\t2\n"
        result_from_overpass += "1\tTűzkő utca\t9\n"
        result_from_overpass += "1\tTűzkő utca\t10\n"
        result_from_overpass += "1\tOSM Name 1\t1\n"
        result_from_overpass += "1\tOSM Name 1\t2\n"
        result_from_overpass += "1\tOnly In OSM utca\t1\n"

        def mock_urlopen(_url: str, _data: Optional[bytes] = None) -> BinaryIO:
            buf = io.BytesIO()
            buf.write(result_from_overpass.encode('utf-8'))
            buf.seek(0)
            return buf

        with unittest.mock.patch('util.get_abspath', get_abspath):
            with unittest.mock.patch("cron.overpass_sleep",
                                     mock_overpass_sleep):
                with unittest.mock.patch('urllib.request.urlopen',
                                         mock_urlopen):
                    relations = get_relations()
                    for relation_name in relations.get_active_names():
                        if relation_name != "gazdagret":
                            relations.get_relation(
                                relation_name).get_config().set_active(False)
                    expected = util.get_content(
                        relations.get_workdir(),
                        "street-housenumbers-gazdagret.csv")
                    os.unlink(
                        os.path.join(relations.get_workdir(),
                                     "street-housenumbers-gazdagret.csv"))
                    cron.update_osm_housenumbers(relations)
                    self.assertTrue(mock_overpass_sleep_called)
                    actual = util.get_content(
                        relations.get_workdir(),
                        "street-housenumbers-gazdagret.csv")
                    self.assertEqual(actual, expected)
Example #32
0
 def test_happy(self) -> None:
     """Tests the happy path."""
     relations = get_relations()
     for relation_name in relations.get_active_names():
         if relation_name not in ("gazdagret", "ujbuda"):
             relations.get_relation(relation_name).get_config().set_active(False)
     path = os.path.join(relations.get_workdir(), "street-housenumbers-reference-gazdagret.lst")
     expected = util.get_content(path)
     os.unlink(path)
     cron.update_ref_housenumbers(relations, update=True)
     mtime = os.path.getmtime(path)
     cron.update_ref_housenumbers(relations, update=False)
     self.assertEqual(os.path.getmtime(path), mtime)
     actual = util.get_content(path)
     self.assertEqual(actual, expected)
     # Make sure housenumber ref is not created for the streets=only case.
     ujbuda_path = os.path.join(relations.get_workdir(), "street-housenumbers-reference-ujbuda.lst")
     self.assertFalse(os.path.exists(ujbuda_path))
Example #33
0
 def test_happy(self) -> None:
     """Tests the happy path."""
     with unittest.mock.patch('config.get_abspath', get_abspath):
         relations = get_relations()
         for relation_name in relations.get_active_names():
             # ujbuda is streets=only
             if relation_name not in ("gazdagret", "ujbuda"):
                 relations.get_relation(relation_name).get_config().set_active(False)
         path = os.path.join(relations.get_workdir(), "gazdagret.percent")
         expected = util.get_content(path)
         os.unlink(path)
         cron.update_missing_housenumbers(relations, update=True)
         mtime = os.path.getmtime(path)
         cron.update_missing_housenumbers(relations, update=False)
         self.assertEqual(os.path.getmtime(path), mtime)
         actual = util.get_content(path)
         self.assertEqual(actual, expected)
         # Make sure housenumber stat is not created for the streets=only case.
         self.assertFalse(os.path.exists(os.path.join(relations.get_workdir(), "ujbuda.percent")))
Example #34
0
 def test_happy(self) -> None:
     """Tests the happy path."""
     relations = get_relations()
     relation_name = "gazdagret"
     relation = relations.get_relation(relation_name)
     expected = util.get_content(relations.get_workdir(), "gazdagret.percent")
     ret = relation.write_missing_housenumbers()
     todo_street_count, todo_count, done_count, percent, table = ret
     self.assertEqual(todo_street_count, 3)
     self.assertEqual(todo_count, 5)
     self.assertEqual(done_count, 6)
     self.assertEqual(percent, '54.55')
     string_table = table_doc_to_string(table)
     self.assertEqual(string_table, [['Street name', 'Missing count', 'House numbers'],
                                     ['Törökugrató utca', '2', '7<br />10'],
                                     ['Tűzkő utca', '2', '1<br />2'],
                                     ['Hamzsabégi út', '1', '1']])
     actual = util.get_content(relations.get_workdir(), "gazdagret.percent")
     self.assertEqual(actual, expected)
Example #35
0
 def test_happy(self) -> None:
     """Tests the happy path."""
     with unittest.mock.patch('config.get_abspath', get_abspath):
         relations = get_relations()
         for relation_name in relations.get_active_names():
             # gellerthegy is streets=no
             if relation_name not in ("gazdagret", "gellerthegy"):
                 relations.get_relation(relation_name).get_config().set_active(False)
         path = os.path.join(relations.get_workdir(), "streets-reference-gazdagret.lst")
         expected = util.get_content(path)
         os.unlink(path)
         cron.update_ref_streets(relations, update=True)
         mtime = os.path.getmtime(path)
         cron.update_ref_streets(relations, update=False)
         self.assertEqual(os.path.getmtime(path), mtime)
         actual = util.get_content(path)
         self.assertEqual(actual, expected)
         # Make sure street ref is not created for the streets=no case.
         ujbuda_path = os.path.join(relations.get_workdir(), "streets-reference-gellerthegy.lst")
         self.assertFalse(os.path.exists(ujbuda_path))
Example #36
0
    def find_question_by_link(self, topic_url, count_id):
        content = get_content(topic_url, count_id)

        if content == "FAIL":
            return 0

        soup = BeautifulSoup(content, 'lxml')

        question_links = soup.findAll('a', attrs={'class': 'question_link'})

        rowcount = 0
        for question_link in question_links:
            # get question id and name
            question_link = question_link.get('href')
            if question_link:
                question_url = 'http://www.zhihu.com' + question_link
                rowcount += self.find_answers_by_question_url(question_url, count_id)
        return rowcount
Example #37
0
def convert():
    if util.validate_request_subreddit(request.form) is not None:
        return jsonify(type='danger', text=util.validate_request_subreddit(request.form))

    subreddit = request.form['subreddit']
    include_comments = request.form['comments']
    time = request.form['time']
    limit = int(request.form['limit'])
    address = request.form['email']
    kindle_address = request.form['kindle_address']

    try:
        posts = util.get_posts(subreddit, time, limit)
        if time == 'all':
            title = 'Top ' + str(limit) + ' posts from /r/' + subreddit + ' ever'
        else:
            title = 'Top ' + str(limit) + ' posts from /r/' + subreddit + ' over the past ' + time
        top = []
        for post in posts:
            author = '[deleted]' if post.author is None else post.author.name
            comments = None
            if include_comments == 'true':
                post.comments.replace_more(limit=0)
                comments = util.get_comments(post, request.form['comments_style'], author)
            try:
                top.append({'title': post.title,
                            'body': util.get_content(post.url) if not post.url.startswith(
                                'https://www.reddit.com/r/') else util.markdown(
                                post.selftext),
                            'author': author,
                            'comments': comments})
            except:
                pass
    except:
        return jsonify(type='danger', text='That ain\'t no subreddit I\'ve ever heard of!')

    attachment = render_template('posts.html', posts=top, title=title)

    status = util.send_email(address, kindle_address, attachment, title)

    if status is None:
        return jsonify(type='success', text='Success!')
    else:
        return jsonify(type='warning', text='Uh oh! Something went wrong on our end')
Example #38
0
    def find_question_by_link(self,topic_url,count_id):
        content = get_content(topic_url,count_id)

        if content == "FAIL":
            return 0

        soup = BeautifulSoup(content)

        questions = soup.findAll('div',attrs={'class':'feed-item'})

        i = 0
        p_str = 'INSERT IGNORE INTO QUESTION (NAME, LINK_ID, FOCUS, ANSWER, LAST_VISIT, ADD_TIME, TOP_ANSWER_NUMBER, ACTIVATE, REVIEW, FIRST_COMMENT) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'
        anser_list = []
        time_now = int(time.time())

        for question in questions:
            # Do not add questions that are more than 5 days old
            quesTag = question.find('span', attrs={'class':'time'})
            if quesTag is not None:
                quesTime = quesTag.get_text()
            else:
                break
            m = re.search(u'周|月|年', quesTime)
            if m is not None:
                break

            quesLink = question.find('a', attrs={'class':'question_link'})
            tem_text = quesLink.get_text()
            tem_id = quesLink.get('href')
            tem_id = tem_id.replace('/question/','')

            anser_list = anser_list + [(tem_text, int(tem_id), 0, 0, 0, time_now, 0, 0, 0, 0)]

        self.cursor.executemany(p_str,anser_list)

        return len(anser_list)
Example #39
0
from itertools import permutations
from util import is_prime, get_content


def solve(words):
    max_length = max([len(word) for word in words])
    max_value = 26 * max_length
    trians = []
    for i in xrange(1, max_value):
        n = i * (i + 1) / 2
        if n > max_value:
            break
        trians.append(n)
    print trians
    c = 0
    for word in words:
        if sum([ord(w)-64 for w in word]) in trians:
            c += 1

    return c


if __name__ == "__main__":
    import cProfile

    text = get_content("http://projecteuler.net/project/words.txt")
    words = [word.replace('"', '').strip() for word in text.split(',')]

    cProfile.run("print solve(words)")
Example #40
0
from util import get_content


def score_name(name):
    return sum([ord(c) - 64 for c in name])


def solve(names):
    s = 0
    for i, name in enumerate(names):
        s += sum([ord(c) - 64 for c in name]) * (i + 1)
    return s


if __name__ == "__main__":
    import cProfile

    text = get_content("http://projecteuler.net/project/names.txt")
    names = [name.replace('"', '').strip() for name in text.split(',')]
    names.sort()
    cProfile.run('print solve(names)')
Example #41
0
    def update(self,link_id,count_id):
        time_now = int(time.time())
        questionUrl = 'http://www.zhihu.com/question/' + link_id

        content = get_content(questionUrl,count_id)
        if content == "FAIL":
            sql = "UPDATE QUESTION SET LAST_VISIT = %s WHERE LINK_ID = %s"
            self.cursor.execute(sql,(time_now,link_id))
            return

        soup = BeautifulSoup(content)

        # There are 3 numbers in this format
        # Focus, Last Activated and Review
        numbers = soup.findAll('div',attrs={'class':'zg-gray-normal'})

        if len(numbers) != 3:
            print "LINK_ID:" + link_id + "Does not have 3 numbers"
            return
        focus    = numbers[0]
        activate = numbers[1]
        review   = numbers[2]
        # Find out how many people focus this question.
        m = re.search(r'<strong>(.*?)</strong>', str(focus))
        if m == None:
            focus_amount = '0'
        else:
            focus_amount = m.group(1)
        # Find out when is this question last activated
        m = re.search(r'>(.*?)<', str(activate))
        if m == None:
            activate_time = u'Unknown'
        else:
            activate_time = get_time(m.group(1))
        # Find out how many people reviewed this question
        m = re.search(r'<strong>(.*?)</strong>', str(review))
        if m == None:
            review_amount = '0'
        else:
            review_amount = m.group(1)

        # Find out how many people answered this question.
        answer_amount = soup.find('h3',attrs={'id':'zh-question-answer-num'})
        if answer_amount != None:
            answer_amount = answer_amount.get_text().replace(u' 个回答','')
        else:
            answer_amount = soup.find('div',attrs={'class':'zm-item-answer'})
            if answer_amount != None:
                answer_amount = u'1'
            else:
                answer_amount = u'0'

        # Find out the top answer's vote amount.
        top_answer = soup.findAll('span',attrs={'class':'count'})
        if top_answer == []:
            top_answer_votes = 0
        else:
            top_answer_votes = 0
            for t in top_answer:
                t = t.get_text()
                t = t.replace('K','000')
                t = int(t)
                if t > top_answer_votes:
                    top_answer_votes = t

        # Find out the first commend date.
        comment_dates = soup.findAll('a',class_="answer-date-link")
        if comment_dates == []:
            first_comment_time = 0
        else:
            times = map(get_time, comment_dates)
            first_comment_time = min(times)

        # print it to check if everything is good.
        if count_id % 1 == 0:
            print str(count_id) + " , " + self.getName() + " Update QUESTION set FOCUS = " + focus_amount + " , ANSWER = " + answer_amount + ", LAST_VISIT = " + str(time_now) + ", TOP_ANSWER_NUMBER = " + str(top_answer_votes) + " where LINK_ID = " + link_id
        #print str(count_id) + " , " + self.getName() + " Update QUESTION set FOCUS = " + focus_amount + " , ANSWER = " + answer_amount + ", LAST_VISIT = " + str(time_now) + ", TOP_ANSWER_NUMBER = " + str(top_answer_votes) + " where LINK_ID = " + link_id
        
        # Update this question
        sql = "UPDATE QUESTION SET FOCUS = %s , ANSWER = %s, LAST_VISIT = %s, TOP_ANSWER_NUMBER = %s , ACTIVATE = %s, REVIEW = %s , FIRST_COMMENT = %s WHERE LINK_ID = %s"
        self.cursor.execute(sql,(focus_amount,answer_amount,time_now,top_answer_votes,activate_time, review_amount, first_comment_time, link_id))

        # Find out the topics related to this question
        topics = soup.findAll('a',attrs={'class':'zm-item-tag'})
        sql_str = "INSERT IGNORE INTO TOPIC (NAME, LAST_VISIT, LINK_ID, ADD_TIME, PRIORITY) VALUES (%s, %s, %s, %s, %s)"
        topicList = []
        for topic in topics:
            topicName = topic.get_text().replace('\n','')
            topicUrl = topic.get('href').replace('/topic/','')
            #sql_str = sql_str + "('" + topicName + "',0," + topicUrl + "," + str(time_now) + "),"
            topicList = topicList + [(topicName, 0, topicUrl, time_now, 0)]
        
        self.cursor.executemany(sql_str,topicList)
Example #42
0
    def update(self,link_id,count_id):
        time_now = int(time.time())
        questionUrl = 'http://www.zhihu.com/question/' + link_id

        content = get_content(questionUrl,count_id)
        if content == "FAIL":
            sql = "UPDATE QUESTION SET LAST_VISIT = %s WHERE LINK_ID = %s"
            self.cursor.execute(sql,(time_now,link_id))
            return

        soup = BeautifulSoup(content)

        questions = soup.find('div',attrs={'class':'zg-gray-normal'})

        # Find out how many people focus this question.
        if questions == None:
            return
        else:
            focus_amount = questions.getText().replace('\n','')
            focus_amount = focus_amount.replace(u'人关注该问题','')
            focus_amount = focus_amount.replace(u'关注','')

            if focus_amount == u'问题还没有':
                focus_amount = u'0'

        focus_amount = focus_amount.replace(u'问题','')

        if focus_amount == u'\\xe8\\xbf\\x98\\xe6\\xb2\\xa1\\xe6\\x9c\\x89':  # This is a special case.
            return

        # Find out how many people answered this question.
        answer_amount = soup.find('h3',attrs={'id':'zh-question-answer-num'})
        if answer_amount != None:
            answer_amount = answer_amount.getText().replace(u' 个回答','')
        else:
            answer_amount = soup.find('div',attrs={'class':'zm-item-answer'})
            if answer_amount != None:
                answer_amount = u'1'
            else:
                answer_amount = u'0'

        # Find out the top answer's vote amount.
        top_answer = soup.findAll('span',attrs={'class':'count'})
        if top_answer == []:
            top_answer_votes = 0
        else:
            top_answer_votes = 0
            for t in top_answer:
                t = t.getText()
                t = t.replace('K','000')
                t = int(t)
                if t > top_answer_votes:
                    top_answer_votes = t

        # print it to check if everything is good.
        if count_id % 1 == 0:
            print str(count_id) + " , " + self.getName() + " Update QUESTION set FOCUS = " + focus_amount + " , ANSWER = " + answer_amount + ", LAST_VISIT = " + str(time_now) + ", TOP_ANSWER_NUMBER = " + str(top_answer_votes) + " where LINK_ID = " + link_id
        #print str(count_id) + " , " + self.getName() + " Update QUESTION set FOCUS = " + focus_amount + " , ANSWER = " + answer_amount + ", LAST_VISIT = " + str(time_now) + ", TOP_ANSWER_NUMBER = " + str(top_answer_votes) + " where LINK_ID = " + link_id
        
        # Update this question
        sql = "UPDATE QUESTION SET FOCUS = %s , ANSWER = %s, LAST_VISIT = %s, TOP_ANSWER_NUMBER = %s WHERE LINK_ID = %s"
        self.cursor.execute(sql,(focus_amount,answer_amount,time_now,top_answer_votes,link_id))

        # Find out the topics related to this question
        topics = soup.findAll('a',attrs={'class':'zm-item-tag'})
        if questions != None:
            sql_str = "INSERT IGNORE INTO TOPIC (NAME, LAST_VISIT, LINK_ID, ADD_TIME) VALUES (%s, %s, %s, %s)"
            topicList = []
            for topic in topics:
                topicName = topic.getText().replace('\n','')
                topicUrl = topic.get('href').replace('/topic/','')
                #sql_str = sql_str + "('" + topicName + "',0," + topicUrl + "," + str(time_now) + "),"
                topicList = topicList + [(topicName, 0, topicUrl, time_now)]
            
            self.cursor.executemany(sql_str,topicList)
Example #43
0
            continue
        random_recipients = []
        all_recipients = eggz.get('recipients')
        for i in range(0, SEND_LIMIT_PER_USER):
            random_recipients.append(random.choice(all_recipients))
        for recipient in random_recipients:
            c.send_mail(smtp, recipient)
        smtp.close()


if __name__ == "__main__":
    queue = Queue()
    pool = Pool(PROCESSES, worker, (queue,))
    credentials = get_credentials()
    subject = get_subject()
    content = get_content()
    recipients = get_recipients()
    attachments = get_attachments()

    for x in range(0, HOW_MANY_LOGINS):
        credential = random.choice(credentials)
        args = {'smtp_server': credential.get('smtp_server'),
                'port': credential.get('port'),
                'username': credential.get('username'),
                'password': credential.get('password'),
                'ssl': credential.get('ssl'),
                'starttls': credential.get('starttls'),
                'subject': subject,
                'content': content,
                'recipients': recipients,
                'attachments': attachments}