Exemple #1
0
def findDepartmentToSpecialityToFormToLink(specialities: Dict[str, Dict[str, str]]) -> \
        Tuple[Dict[str, Dict[str, Dict[str, str]]], int]:
    forms = dict()  # {department: {speciality: {form: link}}}
    formCount = [0]

    def addForm(department, speciality, link):
        try:
            for table in makeSoup(link).find_all("table"):
                if table.has_attr(
                        "class") and "ranepa-table" in table["class"]:
                    for tbody in table.find_all("tbody"):
                        for a in tbody.find_all("a"):
                            if a.has_attr("href"):
                                form = visibleSoupToString(a)
                                forms[department][speciality][
                                    form] = RANEPA_ROOT + a["href"]

                                formCount[0] += 1
                                printDot()
        except BaseException:
            logError("Ошибка в форме %s: %s" % (link, traceback.format_exc()))

    for department, specialityDict in specialities.items():
        forms[department] = dict()

        for speciality, link in specialityDict.items():
            forms[department][speciality] = dict()

            taskQueue.put((addForm, (department, speciality, link)))

    taskQueue.join()

    return forms, formCount[0]
Exemple #2
0
def findDepartmentToSpecialityToLink(
        departments: Dict[str, str]) -> Tuple[Dict[str, Dict[str, str]], int]:
    specialities = dict()  # {department: {speciality: link}}
    specialityCount = [0]

    def addSpeciality(department, link):
        try:
            for table in makeSoup(link).find_all("table"):
                if table.has_attr(
                        "class") and "ranepa-table" in table["class"]:
                    for tbody in table.find_all("tbody"):
                        for a in tbody.find_all("a"):
                            if a.has_attr("href"):
                                speciality = visibleSoupToString(a)
                                specialities[department][
                                    speciality] = RANEPA_ROOT + a["href"]

                                specialityCount[0] += 1
                                printDot()
        except BaseException:
            logError("Ошибка в департаменте %s: %s" %
                     (link, traceback.format_exc()))

    for department, link in departments.items():
        specialities[department] = dict()

        taskQueue.put((addSpeciality, (department, link)))

    taskQueue.join()

    return specialities, specialityCount[0]
Exemple #3
0
def findAbitsAsync(contestLinks: List[str]) -> Dict[str, List]:
    abits: Dict[str, List[str, str]] = dict()

    for contestLink in contestLinks:
        taskQueue.put((findAbits, (abits, contestLink)))

    taskQueue.join()

    return abits
Exemple #4
0
def findContestListsAsync(contestLinks: Set[str]) -> dict:
    contestLists = dict()  # contestPage: { abit }

    for contestLink in contestLinks:
        taskQueue.put((extractList, (contestLists, contestLink)))

    taskQueue.join()

    return contestLists
Exemple #5
0
def findAbitsAsync(contestLinks: List[str],
                   additionalParameters: dict) -> dict:
    abits = dict()  # {link: [abits]}

    for contestLink in contestLinks:
        taskQueue.put((findAbits, (abits, contestLink, additionalParameters)))

    taskQueue.join()

    return abits
Exemple #6
0
def findDepartmentToSpecialityToFormToEducationalProgramToLink(forms: Dict[str, Dict[str, Dict[str, str]]]) -> \
        Tuple[Dict[str, Dict[str, Dict[str, Dict[str, str]]]], int]:
    educationalPrograms = dict(
    )  # {department: {speciality: {form: {educationalProgram: link}}}}
    educationalProgramCount = [0]

    def addEducationalProgram(department, speciality, form, link):
        try:
            for table in makeSoup(link).find_all("table"):
                if table.has_attr(
                        "class") and "ranepa-table" in table["class"]:
                    for tbody in table.find_all("tbody"):
                        for a in tbody.find_all("a"):
                            if a.has_attr("href"):
                                educationalProgram = visibleSoupToString(a)
                                educationalPrograms[department][speciality][
                                    form][
                                        educationalProgram] = RANEPA_ROOT + a[
                                            "href"]

                                educationalProgramCount[0] += 1
                                printDot()
        except BaseException:
            logError("Ошибка в образовательной программе %s: %s" %
                     (link, traceback.format_exc()))

    for department, specialityDict in forms.items():
        educationalPrograms[department] = dict()

        for speciality, formDict in specialityDict.items():
            educationalPrograms[department][speciality] = dict()

            for form, link in formDict.items():
                educationalPrograms[department][speciality][form] = dict()

                taskQueue.put((addEducationalProgram, (department, speciality,
                                                       form, link)))

    taskQueue.join()

    return educationalPrograms, educationalProgramCount[0]
Exemple #7
0
def findLinkToAbit(educationalPrograms: Dict[str, Dict[str, Dict[str, Dict[str, str]]]]) -> \
        Tuple[Dict[str, list], int]:
    links = dict()  # {link: [abits]}
    abitCount = [0]

    def addAbits(department, speciality, form, educationalProgram, link):
        try:
            abits = list()
            for section in makeSoup(link).find_all("section"):

                def sectionIsEmpty(section):
                    return 'style="text-align:center">Список пуст</td>' in soupToRawString(
                        section)

                if sectionIsEmpty(section):
                    continue

                if section.has_attr("id") and section["id"] in (
                        "list_budget", "list_contract"):
                    fioIdx = -42
                    statusIdx = -42
                    sumIdx = -42
                    individualBonusIdx = -42
                    originalIdx = -1

                    for idx, td in enumerate(
                            section.find("thead").find_all("th")):
                        contents = visibleSoupToString(td)
                        if "ФИО" in contents:
                            fioIdx = idx
                        elif "Статус" in contents:
                            statusIdx = idx
                        elif "Сумма конкурсных баллов" in contents:
                            sumIdx = idx
                        elif "Сумма баллов по индивидуальным достижениям" in contents:
                            individualBonusIdx = idx

                    assert -42 not in (fioIdx, statusIdx, sumIdx,
                                       individualBonusIdx)

                    subjectsBeginIdx = sumIdx + 1
                    subjects = list(
                        map(
                            visibleSoupToString,
                            section.find("thead").find_all("th")
                            [subjectsBeginIdx:individualBonusIdx]))

                    for tr in section.find("tbody").find_all("tr"):
                        abit = dict()

                        abit[PROPERTY.DEPARTMENT] = department
                        abit[PROPERTY.SPECIALITY] = speciality
                        abit[PROPERTY.EDU_PROG] = educationalProgram
                        abit[PROPERTY.ONLY_IN_WALLS] = form
                        abit[PROPERTY.
                             FOR_MONEY] = section["id"] == "list_contract"

                        tds = tr.find_all("td")

                        abit[PROPERTY.ABIT_NAME] = visibleSoupToString(
                            tds[fioIdx])
                        abit[PROPERTY.CONTEST_TYPE] = visibleSoupToString(
                            tds[statusIdx])
                        abit[PROPERTY.GRADES] = dict()
                        try:
                            abit[PROPERTY.SUM] = float(
                                visibleSoupToString(tds[sumIdx]))
                        except ValueError:
                            abit[PROPERTY.SUM] = None
                        for idx, subj in enumerate(subjects):
                            try:
                                abit[PROPERTY.GRADES][subj] = float(
                                    visibleSoupToString(tds[subjectsBeginIdx +
                                                            idx]))
                            except ValueError:
                                abit[PROPERTY.GRADES][subj] = None
                        abit[PROPERTY.EXTRA_BONUS] = float(
                            visibleSoupToString(tds[individualBonusIdx]))
                        abit[PROPERTY.ORIGINAL] = visibleSoupToString(
                            tds[originalIdx]) == "Оригинал"

                        abits.append(abit)

            abitCount[0] += len(abits)
            links[link] = abits
            printDot()
        except BaseException:
            logError("Ошибка в списке %s: %s" % (link, traceback.format_exc()))

    for department, specialityDict in educationalPrograms.items():
        for speciality, formDict in specialityDict.items():
            for form, educationalProgramDict in formDict.items():
                for educationalProgram, link in educationalProgramDict.items():
                    taskQueue.put((addAbits, (department, speciality, form,
                                              educationalProgram, link)))

    taskQueue.join()

    return links, abitCount[0]