def _(record: Rank, conn: Connection) -> None: cur = conn.cursor() if record.field_id is not None: cur.execute(sql_dict['insert']['rank'][1], record.to_namedtuple()) else: cur.execute(sql_dict['insert']['rank'][0], record.to_namedtuple()) conn.commit()
def create_table(conn: Connection, sql: str) -> None: try: c = conn.cursor() c.execute(sql) conn.commit() except Error as e: print(e)
def create_table_if_not_exist(conn: Connection): sql_paper = """ CREATE TABLE IF NOT EXISTS papers ( title text, url text PRIMARY KEY, date text, authors text, tasks text, url_pdf text, url_abs text, arxiv_id text, Timestamp DATETIME DEFAULT CURRENT_TIMESTAMP ); """ sql_repo = """CREATE TABLE IF NOT EXISTS repos ( name text, paper_url text NOT NULL, url text NOT NULL, readme text, private BOOLEAN NOT NULL CHECK (private IN (0,1)), framework text, mentioned_in_paper BOOLEAN NOT NULL CHECK (private IN (0,1)), mentioned_in_github BOOLEAN NOT NULL CHECK (private IN (0,1)), stars INTEGER, lang text, forks INTEGER, PRIMARY KEY(name, paper_url) FOREIGN KEY (paper_url) REFERENCES papers(url));""" sql_files = """CREATE TABLE IF NOT EXISTS files ( path text, url text, repo_name text, name text, size integer, FOREIGN KEY (repo_name) REFERENCES repos(name), PRIMARY KEY (path, repo_name));""" if conn: c = conn.cursor() c.execute(sql_paper) c.execute(sql_repo) c.execute(sql_files) conn.commit() else: raise AttributeError
def crawl_task(conn: Connection, cur: Cursor, contest: ContestListPage.Contest) -> bool: slug: str = contest.contest_slug cur.execute('SELECT COUNT(*) FROM tasks WHERE contest_slug = ?', (slug, )) count_result = cur.fetchone() exists_in_table = (count_result[0] > 0) if exists_in_table: print(f' -> There already exists in table') return False tlprr: TaskListPageRequestResult = TaskListPageRequestResult.create_from_request( slug) if tlprr.is_closed: print(f' -> Task list: 404') return True print(f' -> Task size: {len(tlprr.task_list_page.tasks)}') seq_of_parameters: List[TaskDBInsertData] = tlprr.generate_insert_data() cur.executemany('INSERT INTO tasks VALUES (?,?,?,' '?,?,?)', seq_of_parameters) conn.commit() return True
def crawl_contest(conn: Connection, cur: Cursor, contest: ContestListPage.Contest) -> None: slug: str = contest.contest_slug # 開始するページ番号の決定 cur.execute('SELECT MAX(pagenum) FROM submissions WHERE contest = ?', (slug, )) pagenum_max_result: Tuple[Optional[int]] = cur.fetchone() pagenum_max: Optional[int] = pagenum_max_result[0] pagenum: int = 1 if pagenum_max is not None: pagenum = pagenum_max + 1 # return while True: # ページ取得 result: SubmissionListPageRequestResult = SubmissionListPageRequestResult.create_from_request( slug, pagenum) # print(result) # exit() count_result: Tuple[Optional[int]] exists_in_table: bool if result.is_closed: print(f' -> Page {result.pagenum}: 404') # コンテスト情報挿入 cur.execute('SELECT COUNT(*) FROM contests WHERE contest_slug = ?', (slug, )) count_result = cur.fetchone() exists_in_table = (count_result[0] == 1) if not exists_in_table: cur.execute('INSERT INTO contests VALUES (?,?,?,?,?,?)', (slug, contest.contest_name, contest.time_unix, int((contest.time + timedelta(minutes=contest.duration_minutes) ).timestamp()), 1, 1)) conn.commit() break else: print( f' -> Page {result.pagenum}: size={len(result.submission_list_page.submissions)}, ' f'min={result.submission_list_page.submissions[0].time}, max={result.submission_list_page.submissions[-1].time}' ) # コンテスト情報挿入 cur.execute('SELECT COUNT(*) FROM contests WHERE contest_slug = ?', (slug, )) count_result = cur.fetchone() exists_in_table = (count_result[0] == 1) if not exists_in_table: cur.execute( 'INSERT INTO contests VALUES (?,?,?,?,?,?)', (slug, result.submission_list_page.contest_title, result.submission_list_page.contest_starttime_unix, result.submission_list_page.contest_endtime_unix, 0, 0)) # 提出情報挿入 seq_of_parameters: List[DBInsertData] = result.generate_insert_data() try: cur.executemany( 'INSERT INTO submissions VALUES (?,?,?,?,' '?,?,?,?,?,?,?,?,?,?)', seq_of_parameters) except sqlite3.Error as e: print(e) break conn.commit() # 最後のページなら抜ける if result.is_last_page: break pagenum += 1 time.sleep(3) cur.execute( 'UPDATE contests SET crawl_completed = 1 WHERE contest_slug = ?', (slug, )) conn.commit()
def commit_close(con: Connection) -> None: con.commit() con.close()
def _(record: Suffixes, conn: Connection) -> None: cur = conn.cursor() cur.executemany(sql_dict['insert']['suffix'], record.to_namedtuple_collection()) conn.commit()
def _(record: GenusTypes, conn: Connection) -> None: cur = conn.cursor() cur.executemany(sql_dict['insert']['genus_type'], record.to_namedtuple_collection()) conn.commit()
def _(record: Field, conn: Connection) -> None: cur = conn.cursor() cur.execute(sql_dict['insert']['field'], record.to_namedtuple()) conn.commit()
def create_table(connection: Connection, query: str) -> None: connection.execute(query) connection.commit()