Beispiel #1
0
    def usage(self, show_usage='text', error_msg=None):
        if show_usage is None:
            if not self.quiet:
                self.showTitle()
            return

        if show_usage == 'text':
            self.showTitle()
            log.info()

        if show_usage == 'desc':
            print self.doc

        else:
            utils.format_text(self.usage_data, show_usage, self.title,
                              self.mod, self.version)

            if error_msg:
                for e in error_msg:
                    log.error(e)

                sys.exit(1)

            sys.exit(0)

            if show_usage == 'text':
                sys.exit(0)
Beispiel #2
0
    def usage(self, show_usage='text', error_msg=None):
        if show_usage is None:
            if not self.quiet:
                self.showTitle()
            return

        if show_usage == 'text':
            self.showTitle()
            log.info()

        if show_usage == 'desc':
            print self.doc

        else:
            utils.format_text(self.usage_data, show_usage, self.title, self.mod, self.version)

            if error_msg:
                for e in error_msg:
                    log.error(e)

                sys.exit(1)

            sys.exit(0)

            if show_usage == 'text':
                sys.exit(0)
Beispiel #3
0
def get_main_keyboard(user_search: bool, lang: str = 'ru'):
    keyboard = ReplyKeyboardMarkup(resize_keyboard=True, )
    if user_search is True:
        keyboard.insert(
            KeyboardButton(text=format_text('btn finding job true', lang)))
    else:
        keyboard.insert(
            KeyboardButton(text=format_text('btn finding job false', lang)))
    keyboard.insert(KeyboardButton(text=format_text('btn settings', lang)))
    return keyboard
Beispiel #4
0
def run():
    query = format_text(input("Search: "))
    while query != "":
        res = keywords.match(query)
        if (len(res["indexes"]) == 0):
            print("Sorry, the course was not found in the list")
        else:
            output = [
                "[{}] {}".format(p[0], p[1])
                for p in zip(res["indexes"], res["titles"])
            ]
            print("\n".join(output))
        query = format_text(input("Search: "))
Beispiel #5
0
async def cancel_search(message: types.Message, state: FSMContext):
    if message.text == format_text('btn_cancel'):
        user = await get_user(message.chat.id)
        data = await state.get_data()
        delete_msg = data.get("delete_msg")
        await state.finish()
        for msg in delete_msg:
            try:
                await bot.delete_message(message.chat.id, msg)
            except:
                pass
        await message.answer(format_text('msg cancel search'),
                             reply_markup=get_main_keyboard(user[5]))
Beispiel #6
0
    def answer_tweets(self):
        tweets = self.api.mentions_timeline(since_id=self.last_tweet)
        if len(tweets) > 0:
            self.last_tweet = tweets[-1].id
            print(self.last_tweet)

        for tweet in tweets:
            tweet = self.api.get_status(tweet.id, tweet_mode="extended")

            # Credentials for your Twitter bot account
            user = tweet.user
            hashtags = tweet.entities["hashtags"]
            text = utils.format_text(tweet.full_text, self.hashtag)
            name = user.screen_name
            in_reply_id = tweet.id

            if not utils.verify_tweet(user, hashtags, self.hashtag):
                return 0

            print("Helping {}!".format(name))
            status_text = utils.format_to_status_text(name, text)
            print("------------------------")
            print(status_text)
            print("------------------------")

            new_tweet = self.api.update_status(
                status=status_text,
                in_reply_to_status_id=in_reply_id,
                auto_populate_reply_metadata=True)
            self.last_own_tweet = new_tweet.id
Beispiel #7
0
def get_keyword_keyboard():
    menu = InlineKeyboardMarkup(inline_keyboard=[
        [
            InlineKeyboardButton(text=format_text('btn keyword search'),
                                 callback_data='start')
        ],
        [
            InlineKeyboardButton(text=format_text('btn keyword retype'),
                                 callback_data='retype')
        ],
        [
            InlineKeyboardButton(text=format_text('btn cancel'),
                                 callback_data='cancel')
        ]
    ])
    return menu
Beispiel #8
0
def callback(bot: telegram.Bot, update: telegram.Update):
    if settings.DEBUG:
        logger.debug('update: %s', update)

    reply_markup = get_reply_markup()
    text = format_text(update.message.from_user, update.message.caption)
    kwargs = dict(chat_id=update.message.chat_id,
                  reply_markup=reply_markup,
                  caption=text)

    if update.message.photo:
        bot_msg = bot.send_photo(**kwargs,
                                 photo=update.message.photo[-1].file_id)
    elif update.message.video:
        bot_msg = bot.send_video(**kwargs, video=update.message.video.file_id)
    elif update.message.document:
        bot_msg = bot.send_document(**kwargs,
                                    document=update.message.document.file_id)
    else:
        return

    try:
        with db:
            Message.create(message_tg_id=bot_msg.message_id,
                           chat_tg_id=update.message.chat_id,
                           user_tg_id=update.message.from_user.id)
        bot.delete_message(update.message.chat_id, update.message.message_id)
    except Exception:
        logger.exception('exception while adding message %s', update)
Beispiel #9
0
async def bot_start(message: types.Message):
    await add_user(message.from_user.full_name, message.from_user.id,
                   message.from_user.username)
    user = await get_user(message.from_user.id)
    await message.answer(format_text('msg start',
                                     'ru',
                                     full_name=message.from_user.full_name),
                         reply_markup=get_main_keyboard(user[5]))
Beispiel #10
0
async def set_keyword(message: types.Message, state: FSMContext):
    await state.update_data(keyword=message.text)
    await Search.next()
    msg = await message.answer(format_text('msg show keyword',
                                           keyword=message.text),
                               reply_markup=get_keyword_keyboard())
    async with state.proxy() as data:
        data['delete_msg'].append(message.message_id)
        data['delete_msg'].append(msg.message_id)
Beispiel #11
0
def update():
    courseList = get()
    kw = keywords.get()

    for id, course in courseList.items():
        c = format_text(course["title"])
        if c not in kw.keys():
            kw[c] = [int(id)]

    keywords.write(kw)
Beispiel #12
0
 def get_sentences_with_word(self, word: str):
     resultList = []
     query = "SELECT context1_body, context2_body, context3_body " \
             " FROM terms " \
             " WHERE base_form = %s "
     self.cur.execute(query, (word, ))
     for queryResult in self.cur.fetchall():
         parsed_result = self.parse_value(queryResult)
         formatted_result = utils.format_text(str(parsed_result))
         resultList.append(formatted_result)
     return resultList
Beispiel #13
0
async def show_vacancy(data: dict):
    text = format_text(
        'msg show vacancy',
        title=data['title'],
        company=data['company'],
        city=data['city'],
        salary=data['salary'],
        description=data['description'],
        created=data['created_onsite_at'],
        url=f'<a href="{data["url"]}">{format_text("msg link")}</a>')
    return text
def run():
    kw = keywords.get()

    c = format_text(input("Search: "))
    while (c != ""):
        res = keywords.match(c)
        if (len(res["indexes"]) == 0):
            print("Sorry, the course was not found in the list")
        elif (len(res["indexes"]) > 1):
            print(
                "These courses satisfy your criteria, please narrow it down:")
            output = [
                "[{}] {}".format(p[0], p[1])
                for p in zip(res["indexes"], res["titles"])
            ]
            print("\n".join(output))
        else:
            i = res["indexes"][0]
            course = res["titles"][0]
            course_keywords = []
            for keyword, matches in kw.items():
                if i in matches:
                    course_keywords.append(keyword)
            print("[{}] {}: {}".format(i, course, str(course_keywords)))
            print("(Re-enter a keyword to have it removed)")
            x = format_text(input())
            while (x != ""):
                if x not in kw:
                    kw[x] = []
                if i in kw[x]:
                    kw[x].remove(i)
                else:
                    kw[x].append(i)
                x = format_text(input())
        c = format_text(input("Search: "))

    keywords.write(kw)
Beispiel #15
0
def callback(bot: telegram.Bot, update: telegram.Update):
    if settings.DEBUG:
        logger.debug('update: %s', update)

    reply_markup = get_reply_markup()
    text = format_text(update.message.from_user, update.message.text)
    kwargs = dict(chat_id=update.message.chat_id,
                  reply_markup=reply_markup,
                  text=text)
    bot_msg = bot.send_message(**kwargs)

    try:
        with db:
            Message.create(message_tg_id=bot_msg.message_id,
                           chat_tg_id=update.message.chat_id,
                           user_tg_id=update.message.from_user.id)
        bot.delete_message(update.message.chat_id, update.message.message_id)
    except Exception:
        logger.exception('exception while adding text message %s', update)
Beispiel #16
0
def get_translation(nmt_outputs, src_seq_length, sent_id, tgt_eos,
                    subword_option):
    """Given batch decoding outputs, select a sentence and turn to text."""
    if tgt_eos: tgt_eos = tgt_eos.encode("utf-8")
    # Select a sentence
    output = nmt_outputs[sent_id, :].tolist()

    # Make the output length same as input length
    input_length = src_seq_length[sent_id]
    output = output[:input_length]
    # If there is an eos symbol in outputs, cut them at that point.
    if tgt_eos and tgt_eos in output:
        output = output[:output.index(tgt_eos)]

    if subword_option == "bpe":  # BPE
        translation = utils.format_bpe_text(output)
    elif subword_option == "spm":  # SPM
        translation = utils.format_spm_text(output)
    else:
        translation = utils.format_text(output)

    return translation
Beispiel #17
0
def get_settings_keyboard(settings, lang: str = 'ru'):
    keyboard = InlineKeyboardMarkup(row_width=1)
    if settings[1] is True:
        keyboard.insert(
            InlineKeyboardButton(
                text=format_text('btn settings robota active', lang),
                callback_data=settings_callback.new(param="robota",
                                                    status="active")))
    else:
        keyboard.insert(
            InlineKeyboardButton(
                text=format_text('btn settings robota negative', lang),
                callback_data=settings_callback.new(param="robota",
                                                    status="negative")))
    if settings[2] is True:
        keyboard.insert(
            InlineKeyboardButton(
                text=format_text('btn settings work active', lang),
                callback_data=settings_callback.new(param="work",
                                                    status="active")))
    else:
        keyboard.insert(
            InlineKeyboardButton(
                text=format_text('btn settings work negative', lang),
                callback_data=settings_callback.new(param="work",
                                                    status="negative")))
    if settings[3] is True:
        keyboard.insert(
            InlineKeyboardButton(
                text=format_text('btn settings dou active', lang),
                callback_data=settings_callback.new(param="dou",
                                                    status="active")))
    else:
        keyboard.insert(
            InlineKeyboardButton(
                text=format_text('btn settings dou negative', lang),
                callback_data=settings_callback.new(param="dou",
                                                    status="negative")))

    return keyboard
Beispiel #18
0
async def searching_call(call: CallbackQuery, state: FSMContext):
    await call.answer(cache_time=3)
    user = await get_user(call.message.chat.id)
    if call.data == "start":
        if user is None:
            await call.message.answer(format_text('msg press start before'),
                                      reply_markup=get_main_keyboard(True))
            await state.finish()
        else:
            delete_messages = []
            empty_sites = []
            settings_count = []
            data = await state.get_data()
            keyword_name = data.get('keyword')
            keyword = await add_keyword(keyword_name)
            keyword_id = keyword[0]
            await call.message.delete()
            search_msg = await call.message.answer(
                format_text('msg start searching'))
            delete_messages.append(search_msg.message_id)
            settings = await get_settings(user[0])
            if settings[1] is True:
                site = "rabota.ua"
                settings_count.append('robota')
                robota_search_msg = await call.message.answer(
                    format_text('msg start searching robota'))
                delete_messages.append(robota_search_msg.message_id)
                jobs, errors = await parse_jobs(rabota, keyword_name)
                if len(jobs) == 0:
                    robota_empty_msg = await call.message.answer(
                        format_text('error no jobs'),
                        reply_markup=get_cancel_keyboard())
                    delete_messages.append(robota_empty_msg.message_id)
                    empty_sites.append('robota')
                else:
                    for j in jobs:
                        vacancy = await add_vacancy(j)
                        await add_vacancy_keyword_association(
                            vacancy[0], keyword_id, j['site'])
                    result_msg_rabota = await call.message.answer(
                        text=format_text("msg search result rabota"),
                        reply_markup=get_cancel_keyboard())
                    delete_messages.append(result_msg_rabota.message_id)
                    first_vacancy_id = await select_range_vacancies(
                        keyword_id, 1, site)
                    first_vacancy = await get_vacancy(id=first_vacancy_id,
                                                      site=site)
                    max_jobs = await count_keyword_vacancies(keyword_id, site)
                    show_msg_rabota = await call.message.answer(
                        text=await show_vacancy(first_vacancy),
                        reply_markup=get_vacancies_pages(
                            max_jobs, keyword_id, site),
                        disable_web_page_preview=True)
                    delete_messages.append(show_msg_rabota.message_id)
            if settings[2] is True:
                site = "work.ua"
                settings_count.append('work')
                work_search_msg = await call.message.answer(
                    format_text('msg start searching work'))
                delete_messages.append(work_search_msg.message_id)
                jobs, errors = await parse_jobs(work, keyword_name)
                if len(jobs) == 0:
                    work_empty_msg = await call.message.answer(
                        format_text('error no jobs'))
                    delete_messages.append(work_empty_msg.message_id)
                    empty_sites.append('work')
                else:
                    for j in jobs:
                        vacancy = await add_vacancy(j)
                        await add_vacancy_keyword_association(
                            vacancy[0], keyword_id, j['site'])
                    result_msg_work = await call.message.answer(
                        text=format_text("msg search result work"))
                    delete_messages.append(result_msg_work.message_id)
                    first_vacancy_id = await select_range_vacancies(
                        keyword_id, 1, site)
                    first_vacancy = await get_vacancy(id=first_vacancy_id,
                                                      site=site)
                    max_jobs = await count_keyword_vacancies(keyword_id, site)
                    show_msg_work = await call.message.answer(
                        text=await show_vacancy(first_vacancy),
                        reply_markup=get_vacancies_pages(
                            max_jobs, keyword_id, site),
                        disable_web_page_preview=True)
                    delete_messages.append(show_msg_work.message_id)
            if settings[3] is True:
                site = "dou.ua"
                settings_count.append('dou')
                work_search_msg = await call.message.answer(
                    format_text('msg start searching dou'))
                delete_messages.append(work_search_msg.message_id)
                jobs, errors = await parse_jobs(dou, keyword_name)
                if len(jobs) == 0:
                    dou_empty_msg = await call.message.answer(
                        format_text('error no jobs'))
                    delete_messages.append(dou_empty_msg.message_id)
                    empty_sites.append('dou')
                else:
                    for j in jobs:
                        vacancy = await add_vacancy(j)
                        await add_vacancy_keyword_association(
                            vacancy[0], keyword_id, j['site'])
                    result_msg_work = await call.message.answer(
                        text=format_text("msg search result dou"),
                        reply_markup=get_cancel_keyboard())
                    delete_messages.append(result_msg_work.message_id)
                    first_vacancy_id = await select_range_vacancies(
                        keyword_id, 1, site)
                    first_vacancy = await get_vacancy(id=first_vacancy_id,
                                                      site=site)
                    max_jobs = await count_keyword_vacancies(keyword_id, site)
                    show_msg_work = await call.message.answer(
                        text=await show_vacancy(first_vacancy),
                        reply_markup=get_vacancies_pages(
                            max_jobs, keyword_id, site),
                        disable_web_page_preview=True)
                    delete_messages.append(show_msg_work.message_id)
            async with state.proxy() as data:
                data["keyword_id"] = keyword_id
                for msg in delete_messages:
                    data['delete_msg'].append(msg)
            if settings[1] is False and settings[2] is False and settings[
                    3] is False:
                await call.message.answer(format_text('error settings'),
                                          reply_markup=get_main_keyboard(
                                              user[5]))
                await state.finish()
            else:
                if len(settings_count) == len(empty_sites):
                    await cancel(call.message, state)
                    await call.message.answer(format_text('error no all jobs'))
                else:
                    await add_search(user[0], keyword_id, datetime.now())
                    await Search.next()
                    await off_user_search(user_id=user[0])
    elif call.data == "retype":
        await Search.keyword.set()
        await call.message.delete()
        msg = await call.message.answer(format_text('msg get new keyword'))
        async with state.proxy() as data:
            data['delete_msg'].append(msg.message_id)
    elif call.data == "cancel":
        data = await state.get_data()
        delete_msg = data.get("delete_msg")
        await state.finish()
        for msg in delete_msg:
            try:
                await bot.delete_message(call.message.chat.id, msg)
            except:
                pass
        await call.message.answer(format_text('msg cancel search'),
                                  reply_markup=get_main_keyboard(user[5]))
Beispiel #19
0
async def no_keyword_action(message: types.Message, state: FSMContext):
    msg = await message.answer(format_text("msg no keyword action"))
    async with state.proxy() as data:
        data['delete_msg'].append(message.message_id)
        data['delete_msg'].append(msg.message_id)
Beispiel #20
0
 def _prepare_query_test(self):
     self.query_text = self.query_text.lower()
     self.query_text = utils.format_text(self.query_text,
                                         self.indexBuilder.stop_words_file)
Beispiel #21
0
    def readPage(url, start=0):
        f = urlopen(url + str(start)).read()
        html = pq(f)
        activeEl = html(".action-bar.top .pagination li.active span")
        if len(activeEl) == 0:
            curr = 0
        else:
            curr = int(activeEl.text())
        isLast = len(html(".action-bar.top .pagination li.next")) == 0
        posts = html("div.post")
        for p in posts:
            if p.attrib["id"] in ignore:
                continue
            d = {}
            pobj = pq(p)

            # get content
            content = pobj("div.content")
            d["plain"] = content.text()
            d["html"] = content.html()

            # get corresponding course
            query = d["plain"]
            res = keywords.match(format_text(query))
            result = ""
            ind = ""
            while len(res["titles"]) != 1:
                result = "no result"
                break
                if len(res["titles"]) == 0:
                    print(
                        "The following query could not be matched with a course:"
                    )
                else:
                    print(
                        "The following query returned more than one result [" +
                        ", ".join(res["titles"]) + "]")
                print(query)
                query = input("Try again: ")
                res = keywords.match(format_text(query))
            if result == "":
                result = res["titles"][0]
                ind = str(res["indexes"][0])
            d["course"] = result

            # get source url
            d["source"] = url + str(start) + "#" + p.attrib["id"]
            if p.attrib["id"] == data["lastID"]:
                # store index when lastID post occurs
                data["lastIndex"] = data["postIndex"]
            data["curr_lastID"] = p.attrib["id"]

            # get date
            date_str = pobj("p.author").html()
            i = date_str.index("» </span>") + 9
            date_str = greekToEngDate(date_str[i:-1])
            date_obj = datetime.strptime(date_str, "%a %b %d, %Y %I:%M %p")
            d["date"] = date_obj.strftime("%d/%m/%Y")
            if ind != "" and ind not in outputJSON:
                data["postIndex"] += 1
                outputJSON[ind] = {"source": d["source"], "date": d["date"]}
        if isLast == True:
            return
        else:
            readPage(url, curr * 20)
Beispiel #22
0
 def get_file_content(self):
     for file_name in self.file_names:
         f = open(self.directory_path + "/" + file_name, 'r').read().lower()
         self.files_content[file_name] = utils.format_text(
             f, self.stop_words_file)
Beispiel #23
0
def parse():
    with open("schedule/{}".format(cfg.get("schedule_file")), "rb") as fp:
        parser = PDFParser(fp)
        document = PDFDocument(parser)
        if not document.is_extractable:
            raise PDFTextExtractionNotAllowed
        rsrcmgr = PDFResourceManager()
        device = PDFDevice(rsrcmgr)
        laparams = LAParams()
        device = PDFPageAggregator(rsrcmgr, laparams=laparams)
        interpreter = PDFPageInterpreter(rsrcmgr, device)

        def parse_obj(lt_objs):
            for obj in lt_objs:
                if isinstance(obj, LTTextBoxHorizontal):
                    coor = getTextCoords(obj.bbox[0:2])
                    text = obj.get_text().replace('\n', ' ')
                    # check if content contains a date
                    match = re.search(r"\d{2}/\d{2}/\d{4}", text)
                    if match:
                        data["dates"].append({
                            "date": match.group(),
                            "coords": coor
                        })
                    match = re.findall(r"\d{1,2}:\d{2}", text)
                    if match:
                        data["hours"].append({
                            "hours":
                            list(map(lambda x: "{0:0>5}".format(x), match)),
                            "coords":
                            coor
                        })
                    data["textboxes"].append([coor, text, ""])

                if isinstance(obj, LTRect):
                    data["rects"].append(getRectCoords(obj.bbox[0:4]))

                if isinstance(obj, LTFigure):
                    parse_obj(obj._objs)

        if LOG_TEXTS:
            with open("outputs/" + cfg.get("folder") + "/pdf_texts.txt",
                      "w",
                      encoding="utf8") as log:
                log.write("")

        with open("outputs/" + cfg.get("folder") + "/pdf_svg.html",
                  "w",
                  encoding="utf8") as svg:
            ''' SVG HEAD '''
            if CREATE_SVG:
                svg.write(
                    "<style type=\"text/css\">svg{stroke:#000;stroke-width:1;fill:none}</style>\n"
                )
            i = 0

            # loop over all pages in the document
            for page in PDFPage.create_pages(document):
                # read the page into a layout object
                interpreter.process_page(page)
                layout = device.get_result()
                ''' CREATE SVG '''
                if CREATE_SVG:
                    svg.write(
                        "<svg id=\"s{}\" width=\"1200\" height=\"600\">\n".
                        format(i))

                data["rects"] = []
                data["textboxes"] = []
                data["dates"] = []
                data["datelines"] = []
                data["hours"] = []

                # extract info from this page
                parse_obj(layout._objs)

                lines = rectsToLines(data["rects"])

                lines = mergeLines(lines)
                lines.sort(key=lambda x: x[1][1])
                lines.sort(key=lambda x: x[0][1])

                grid = createGrid(lines)
                data["textboxes"] = mergeTexts(grid, data["textboxes"])
                data["textboxes"] = splitSimultaneousCourses(data["textboxes"])

                data["hours"].sort(key=lambda x: x["coords"][1])

                if data["hours"]:
                    calcHourBoundaries(grid)
                if data["dates"]:
                    calcDateBoundaries(grid)

                # keyword matching for each textbox
                for t in data["textboxes"]:
                    t[1] = " ".join(t[1].split())
                    res = keywords.match(format_text(t[1]))
                    if len(res["indexes"]) == 1:
                        data["courses"][res["indexes"][0]] = {
                            "coords": t[0],
                            "date": getDate(t[0])
                        }
                        t[2] = " (match: {})".format(res["titles"][0])
                ''' DRAW LINES '''
                if CREATE_SVG:
                    minX, maxX = 1e10, 0
                    for l in lines:
                        svg.write(
                            "<line x1=\"{}\" y1=\"{}\" x2=\"{}\" y2=\"{}\" stroke=\"#{}\"></line>\n"
                            .format(l[0][0], l[0][1], l[1][0], l[1][1],
                                    randomColor()))
                        if l[0][0] < minX:
                            minX = l[0][0]
                        if l[1][0] > maxX:
                            maxX = l[1][0]
                    if SHOW_DATELINES:
                        for h in data["hours"]:
                            svg.write(
                                "<circle cx=\"{}\" cy=\"{}\" r=\"1\" stroke=\"red\"></circle>\n"
                                .format(h["coords"][0], h["coords"][1]))
                        for d in data["dates"]:
                            if d["boundaries"][0] != 0 and d["boundaries"][
                                    1] != 0:
                                svg.write(
                                    "<line x1=\"{}\" y1=\"{}\" x2=\"{}\" y2=\"{}\" stroke=\"#111111\"></line>\n"
                                    .format(minX, d["boundaries"][0], maxX,
                                            d["boundaries"][0]))
                                svg.write(
                                    "<line x1=\"{}\" y1=\"{}\" x2=\"{}\" y2=\"{}\" stroke=\"#111111\"></line>\n"
                                    .format(minX, d["boundaries"][1], maxX,
                                            d["boundaries"][1]))
                    if SHOW_TEXTBOXES:
                        for t in data["textboxes"]:
                            svg.write(
                                "<text x=\"{}\" y=\"{}\" font-size=\"4\" font-weight=\"lighter\">{}</text>\n"
                                .format(t[0][0], t[0][1], t[1][:5]))
                if LOG_TEXTS:
                    with open("outputs/" + cfg.get("folder") +
                              "/pdf_texts.txt",
                              "a",
                              encoding="utf8") as log:
                        for t in data["textboxes"]:
                            log.write("{}, {}, {}{}\n".format(
                                t[0][0], t[0][1], t[1], t[2]))
                ''' CLOSE SVG '''
                if CREATE_SVG:
                    svg.write('</svg>' + "\n")
                i += 1

        coursedates = {}
        for key, c in data["courses"].items():
            coursedates[key] = c["date"]
        write(coursedates)
Beispiel #24
0
def extract_details():
    print(f"{len(read_link())} links achados")

    contador = 1

    for shopping_page in read_link():
        print(f'Extraindo {contador} link')

        details = {}

        # inicializar drivers
        dynamic_result = dynamic_html(shopping_page)
        
        if dynamic_html == False:
            extracted_info.append(details)
            save_to_json(details)
            continue

        crawler = init_parser(dynamic_result)

        details['Nome'] = crawler.find('span', class_="post post-shopping current-item").text

        details['Tipo'] = crawler.find('a', class_="taxonomy operacao").text

        details['link'] = shopping_page

        details_container = crawler.find('div',class_="specs")

        # PERFIL DE CONSUMIDORES
        perfil_title = details_container.find(text="PERFIL DE CONSUMIDORES")
        class_content = perfil_title.findNext('div')

        class_perfil = []
        for p in class_content.find_all('p'):
            class_perfil.append(p.text)


        details['Classe A'] = class_perfil[0]
        details['Classe B'] = class_perfil[1]
        details['Classe C'] = class_perfil[2]
        details['Classe D'] = class_perfil[3]
        # details[perfil_title] = format_text(class_content.text)

        # ENTRETENIMENTO
        enterteiment_title = details_container.find(text="ENTRETENIMENTO")
        enterteiment_content = enterteiment_title.findNext('div')

        # print(enterteiment_title)
        details[enterteiment_title] = format_text(enterteiment_content.text)

        # ÁREA TOTAL DO TERRENO
        area_title = details_container.find(text="ÁREA TOTAL DO TERRENO")
        area_content = area_title.findNext('div')

        # print(area_title)
        details[area_title] = format_text(area_content.text)

        # CONTATO
        contact_title = details_container.find(text="CONTATO")
        contact_content = contact_title.findNext('ul')

        # print(contact_title)
        details[contact_title] = format_text(contact_content.text)

        # Icones

        aditional_info = crawler.find('div', class_="icons shoppings mt-4 mb-4")

        box = aditional_info.find_all('div', class_="box") 

        for box_info in box:
            title = box_info.find('p', class_='mb-0')
            detail_content = box_info.find('p', class_="number")
            
            details[title.text] = detail_content.text


        extracted_info.append(details)
        contador += 1

    print('Finalizado!')

    print('Salvando em json...')

    save_to_json(extracted_info)
    print('Finalizado...')
Beispiel #25
0
def do_next_word(word: str):

    details = db.get_word_details(word)[0]

    node_decl = {}
    node_conj = {}
    node_rest = {}
    node = {}
    gramm_part_of_speech = details['grammatical_part_of_speech']

    node_rest = {"tlumaczenie": "", "czesc_mowy": ""}
    node_rest['tlumaczenie'] = utils.format_text(details['polish_translation'])
    node_rest['czesc_mowy'] = details['grammatical_part_of_speech']
    node_word = details['base_form']
    if gramm_part_of_speech == 0:
        node_rest['rodzaj'] = details['grammatical_gender']

    node[node_word] = node_rest

    if gramm_part_of_speech == 0:
        decl = db.get_noun_declination(word)
        node_decl = {"deklinacja": {0: {}, 1: {}}}
        for el in decl:
            node_decl['deklinacja'][el['grammatical_number']][
                el['grammatical_case']] = el['word']

    if gramm_part_of_speech == 1:
        conj = db.get_conjugation(word)
        node_conj = {"koniugacja": {0: {}, 1: {}, 2: {}}}
        for el in conj:
            node_conj['koniugacja'][el['grammatical_tense']][
                el['grammatical_person']] = el['word']

    if gramm_part_of_speech == 2:
        decl = db.get_adjective_declination(word)
        node_decl = {
            "deklinacja": {
                'meski': {
                    'rowny': {
                        0: {},
                        1: {}
                    },
                    'wyzszy': {
                        0: {},
                        1: {}
                    },
                    'najwyzszy': {
                        0: {},
                        1: {}
                    }
                },
                'zenski': {
                    'rowny': {
                        0: {},
                        1: {}
                    },
                    'wyzszy': {
                        0: {},
                        1: {}
                    },
                    'najwyzszy': {
                        0: {},
                        1: {}
                    }
                },
                'nijaki': {
                    'rowny': {
                        0: {},
                        1: {}
                    },
                    'wyzszy': {
                        0: {},
                        1: {}
                    },
                    'najwyzszy': {
                        0: {},
                        1: {}
                    }
                }
            }
        }
        rodzaj = ''
        for el in decl:
            gramm_gender = el['grammatical_gender']
            if gramm_gender == 0:
                rodzaj = 'meski'
            elif gramm_gender == 1:
                rodzaj = 'zenski'
            elif gramm_gender == 2:
                rodzaj = 'nijaki'
            else:
                rodzaj = 'nijaki'

            gramm_comparison = el['grammatical_comparison']
            if gramm_comparison == 0:
                stopien = 'rowny'
            elif gramm_comparison == 1:
                stopien = 'wyzszy'
            elif gramm_comparison == 2:
                stopien = 'najwyzszy'
            else:
                stopien = 'rowny'

            node_decl['deklinacja'][rodzaj][stopien][el['grammatical_number']][
                el['grammatical_case']] = el['word']

    node[node_word].update(node_decl)
    node[node_word].update(node_conj)

    print(node)
    with open("data/lemko_words.json", "r", encoding="utf8") as file:
        lemko_dict = json.load(file)
    lemko_dict.update(node)

    with open('data/lemko_words.json', 'wb') as file:
        file.write(
            json.dumps(lemko_dict,
                       indent=2,
                       sort_keys=True,
                       ensure_ascii=False).encode('utf8'))
Beispiel #26
0
def get_cancel_keyboard(lang: str = 'ru'):
    keyboard = ReplyKeyboardMarkup(
        resize_keyboard=True,
        keyboard=[[KeyboardButton(text=format_text('btn_cancel', lang))]])

    return keyboard