コード例 #1
0
ファイル: portalcheck.py プロジェクト: pi0/portalcheck
def get_courses():
    try:
        response = requests.get(url, headers=headers, verify=False)
        parser = HTMLTableParser()
        parser.feed(response.text)
        info = parser.tables[0][0][1]
        summary = parser.tables[2][2]
        courses = []
        for i in range(5, len(parser.tables[2])):
            course = parser.tables[2][i]
            if len(course[2]) > 0:  # Non TA!
                courses.append({
                    'title': course[1],
                    'title2': course[2],
                    'code': course[3],
                    'v': course[4],
                    'grp': course[5],
                    'score': course[6],
                    'prof': course[8],
                })
        return {
            'info': info,
            'summary': summary,
            'courses': courses,
        }
    except:
        return None
コード例 #2
0
def crawl(fileName):
    stockCodes = getInputStockCode(fileName)
    hp = HTMLTableParser()
    for code in stockCodes:
        finalDF = pd.DataFrame()
        for i in range(1, 51):
            tableDF = hp.parse_url(code, i)
            if (tableDF.empty):
                break
            if (finalDF.empty and not tableDF.empty):
                finalDF = tableDF
            elif (not tableDF.empty):
                finalDF = finalDF.append(tableDF)

        if (not finalDF.empty):
            finalDF.sort_values(by=['date'], inplace=True, ascending=True)
            finalDF.to_csv("./results/" + ''.join(code) + ".csv", index=False)