Esempio n. 1
0
def findContests(spbuSite: str) -> List[str]:
    contests = list()

    for a in makeSoup(spbuSite).find_all("a"):
        if a.has_attr("href"):
            contests.append(a["href"])

    del (contests[0])  # Удалить ссылку на предыдущую страницу

    return contests
Esempio n. 2
0
def findDepartmentToLink(link: str) -> Tuple[Dict[str, str], int]:
    departments = dict()  # {department: link}

    for table in makeSoup(link).find_all("table"):
        if table.has_attr("class") and "ranepa-table" in table["class"]:
            for tbody in table.find_all("tbody"):
                for a in tbody.find_all("a"):
                    if a.has_attr("href"):
                        department = visibleSoupToString(a)
                        departments[department] = RANEPA_ROOT + a["href"]

                        printDot()

    return departments, len(departments)
Esempio n. 3
0
    def addForm(department, speciality, link):
        try:
            for table in makeSoup(link).find_all("table"):
                if table.has_attr(
                        "class") and "ranepa-table" in table["class"]:
                    for tbody in table.find_all("tbody"):
                        for a in tbody.find_all("a"):
                            if a.has_attr("href"):
                                form = visibleSoupToString(a)
                                forms[department][speciality][
                                    form] = RANEPA_ROOT + a["href"]

                                formCount[0] += 1
                                printDot()
        except BaseException:
            logError("Ошибка в форме %s: %s" % (link, traceback.format_exc()))
Esempio n. 4
0
def findAbits(abits: Dict[str, list], contestLink: str) -> None:
    soup = makeSoup(contestLink)

    commonData = dict()
    commonData[PROPERTY.DEPARTMENT] = visibleSoupToString(
        soup.find_all("h3")[DEPARTMENT_NAME_IDX])

    siteText = soupToRawString(soup)

    contestsOnPage = getContestsOnPage(siteText)

    answer = list()

    for contestOnPage in contestsOnPage:
        answer += getAbitsFromContest(contestOnPage, commonData)

    abits[contestLink] = answer
    printDot()
Esempio n. 5
0
    def addEducationalProgram(department, speciality, form, link):
        try:
            for table in makeSoup(link).find_all("table"):
                if table.has_attr(
                        "class") and "ranepa-table" in table["class"]:
                    for tbody in table.find_all("tbody"):
                        for a in tbody.find_all("a"):
                            if a.has_attr("href"):
                                educationalProgram = visibleSoupToString(a)
                                educationalPrograms[department][speciality][
                                    form][
                                        educationalProgram] = RANEPA_ROOT + a[
                                            "href"]

                                educationalProgramCount[0] += 1
                                printDot()
        except BaseException:
            logError("Ошибка в образовательной программе %s: %s" %
                     (link, traceback.format_exc()))
Esempio n. 6
0
    def addAbits(department, speciality, form, educationalProgram, link):
        try:
            abits = list()
            for section in makeSoup(link).find_all("section"):

                def sectionIsEmpty(section):
                    return 'style="text-align:center">Список пуст</td>' in soupToRawString(
                        section)

                if sectionIsEmpty(section):
                    continue

                if section.has_attr("id") and section["id"] in (
                        "list_budget", "list_contract"):
                    fioIdx = -42
                    statusIdx = -42
                    sumIdx = -42
                    individualBonusIdx = -42
                    originalIdx = -1

                    for idx, td in enumerate(
                            section.find("thead").find_all("th")):
                        contents = visibleSoupToString(td)
                        if "ФИО" in contents:
                            fioIdx = idx
                        elif "Статус" in contents:
                            statusIdx = idx
                        elif "Сумма конкурсных баллов" in contents:
                            sumIdx = idx
                        elif "Сумма баллов по индивидуальным достижениям" in contents:
                            individualBonusIdx = idx

                    assert -42 not in (fioIdx, statusIdx, sumIdx,
                                       individualBonusIdx)

                    subjectsBeginIdx = sumIdx + 1
                    subjects = list(
                        map(
                            visibleSoupToString,
                            section.find("thead").find_all("th")
                            [subjectsBeginIdx:individualBonusIdx]))

                    for tr in section.find("tbody").find_all("tr"):
                        abit = dict()

                        abit[PROPERTY.DEPARTMENT] = department
                        abit[PROPERTY.SPECIALITY] = speciality
                        abit[PROPERTY.EDU_PROG] = educationalProgram
                        abit[PROPERTY.ONLY_IN_WALLS] = form
                        abit[PROPERTY.
                             FOR_MONEY] = section["id"] == "list_contract"

                        tds = tr.find_all("td")

                        abit[PROPERTY.ABIT_NAME] = visibleSoupToString(
                            tds[fioIdx])
                        abit[PROPERTY.CONTEST_TYPE] = visibleSoupToString(
                            tds[statusIdx])
                        abit[PROPERTY.GRADES] = dict()
                        try:
                            abit[PROPERTY.SUM] = float(
                                visibleSoupToString(tds[sumIdx]))
                        except ValueError:
                            abit[PROPERTY.SUM] = None
                        for idx, subj in enumerate(subjects):
                            try:
                                abit[PROPERTY.GRADES][subj] = float(
                                    visibleSoupToString(tds[subjectsBeginIdx +
                                                            idx]))
                            except ValueError:
                                abit[PROPERTY.GRADES][subj] = None
                        abit[PROPERTY.EXTRA_BONUS] = float(
                            visibleSoupToString(tds[individualBonusIdx]))
                        abit[PROPERTY.ORIGINAL] = visibleSoupToString(
                            tds[originalIdx]) == "Оригинал"

                        abits.append(abit)

            abitCount[0] += len(abits)
            links[link] = abits
            printDot()
        except BaseException:
            logError("Ошибка в списке %s: %s" % (link, traceback.format_exc()))
Esempio n. 7
0
def extractList(contestLists: Dict[str, dict], contestPage: str) -> None:
    soup = makeSoup(contestPage)

    if soup is None:
        return

    listProperties = dict()
    subjects = list()

    properties = soupToRawString(soup).split("<br/>")
    for property in properties:
        if "table" in property:
            continue

        if "Образовательная программа:" in property:
            listProperties[PROPERTY.EDU_PROG] = getValue(property)
        elif "Направление:" in property:
            listProperties[PROPERTY.SPECIALITY] = getValue(property)
        elif "Форма обучения:" in property:
            listProperties[PROPERTY.ONLY_IN_WALLS] = getValue(property)
        elif "Основа обучения:" in property:
            listProperties[PROPERTY.FOR_MONEY] = getValue(property)
        elif "ВИ " in property:
            subject = property[property.rfind(":") + 1:].replace("</b>", "").strip()
            subjects.append(subject)

    NAME_COL_IDX = -42
    BIRTHDAY_COL_IDX = None  # У магистратуры нет
    CONTEST_TYPE_COL_IDX = -42
    SUM_COL_IDX = None  # У магистратуры нет
    SUM_EXAM_COL_IDX = None  # У магистратуры нет
    FIRST_GRADE_COL_IDX = None  # Может не быть вступительных испытаний. Например, если пустая страница
    EXTRA_BONUS_COL_IDX = -42
    ORIGINAL_COL_IDX = -42

    for i, th in enumerate(soup.find("tr").find_all("th")):
        text = visibleSoupToString(th)
        if "Фамилия Имя Отчество" in text or "ФИО" in text:
            NAME_COL_IDX = i
        elif "Дата рождения" in text:
            BIRTHDAY_COL_IDX = i
        elif "Тип конкурса" in text:
            CONTEST_TYPE_COL_IDX = i
        elif "Σ общ" in text:
            SUM_COL_IDX = i
        elif "Σ ЕГЭ" in text:
            SUM_EXAM_COL_IDX = i
        elif FIRST_GRADE_COL_IDX is None and "ВИ " in text:
            FIRST_GRADE_COL_IDX = i
        elif "Σ ИД" in text:
            EXTRA_BONUS_COL_IDX = i
        elif "Оригинал" in text:
            ORIGINAL_COL_IDX = i

    assert -42 not in (NAME_COL_IDX, CONTEST_TYPE_COL_IDX, EXTRA_BONUS_COL_IDX, ORIGINAL_COL_IDX)

    abits = []

    for tr in soup.find_all("tr")[1:]:
        tds = tr.find_all("td")

        abit = dict(listProperties)
        abit[PROPERTY.ABIT_NAME] = visibleSoupToString(tds[NAME_COL_IDX])
        if BIRTHDAY_COL_IDX is not None:
            abit[PROPERTY.BIRTHDAY] = visibleSoupToString(tds[BIRTHDAY_COL_IDX])
        abit[PROPERTY.CONTEST_TYPE] = visibleSoupToString(tds[CONTEST_TYPE_COL_IDX])
        if SUM_COL_IDX is not None:
            abit[PROPERTY.SUM] = getFloatGrade(tds[SUM_COL_IDX])
        if SUM_EXAM_COL_IDX is not None:
            abit[PROPERTY.SUM_EXAM] = getFloatGrade(tds[SUM_EXAM_COL_IDX])
        if FIRST_GRADE_COL_IDX is None:
            abit[PROPERTY.GRADES] = None
        else:
            abit[PROPERTY.GRADES] = dict()
            for i in range(len(subjects)):
                abit[PROPERTY.GRADES][subjects[i]] = getFloatGrade(tds[FIRST_GRADE_COL_IDX + i])
        abit[PROPERTY.EXTRA_BONUS] = getFloatGrade(tds[EXTRA_BONUS_COL_IDX])
        abit[PROPERTY.ORIGINAL] = visibleSoupToString(tds[ORIGINAL_COL_IDX]) == "Да"

        abits.append(abit)

    contestLists[contestPage] = abits
    printDot()