def finish_task( connection: psycopg2._psycopg.connection, task_id: int, status: str ) -> None: with connection.cursor() as cursor: cursor.execute("""SELECT finish_task(%s, %s);""", (task_id, status)) connection.commit()
def listen_queue(connection: psycopg2._psycopg.connection, queue: str) -> None: queue_name = sql.Identifier(f"queue#{queue}") with connection.cursor() as cursor: cursor.execute( sql.SQL("""LISTEN {queue_name};""").format(queue_name=queue_name) )
def register_queue( connection: psycopg2._psycopg.connection, queue: str ) -> Optional[int]: with connection.cursor() as cursor: cursor.execute( """INSERT INTO queues (queue_name) VALUES (%s) ON CONFLICT DO NOTHING RETURNING id """, (queue,), ) row = cursor.fetchone() connection.commit() return row[0] if row else None
def is_report_published(conn: psycopg2._psycopg.connection, subject_date: date, report_type: str) -> bool: with conn.cursor() as cur: cur: psycopg2._psycopg.cursor = cur cur.execute(r'''SELECT * FROM is_report_published(%s, %s)''', (subject_date, report_type)) return cur.fetchone()[0]
def import_users(self, connection: psycopg2._psycopg.connection): cur = connection.cursor() cur.execute("SELECT username, date_joined FROM auth_user") for username, date_joined in cur.fetchall(): if not User.objects.filter(username=username).exists(): logger.info("Creating user %s", username) User.objects.create_user(username=username, date_joined=date_joined, password="******")
def get_tasks( connection: psycopg2._psycopg.connection, queue: str ) -> Iterator[TaskRow]: with connection.cursor(cursor_factory=RealDictCursor) as cursor: while True: cursor.execute( """SELECT id, args, targeted_object, task_type FROM fetch_task(%s);""", (queue,), ) connection.commit() row = cursor.fetchone() # fetch_tasks will always return a row, but is there's no relevant # value, it will all be None if row["id"] is None: return yield TaskRow(**row)
def launch_task( connection: psycopg2._psycopg.connection, queue: str, name: str, lock: str, kwargs: types.JSONDict, ) -> int: with connection.cursor() as cursor: cursor.execute( """INSERT INTO tasks (queue_id, task_type, targeted_object, args) SELECT id, %s, %s, %s FROM queues WHERE queue_name=%s RETURNING id;""", (name, lock, kwargs, queue), ) row = cursor.fetchone() if not row: raise exceptions.QueueNotFound(queue) connection.commit() return row[0]
def retrieve_data_then_generate_trend_report_text( conn: psycopg2._psycopg.connection, daily_qst_thread_id: int, date: datetime, uuid: str, rank_include_rule: IncludeRule, rank_page_capacity: int, should_compare_with_last_day: bool, ) -> Tuple[str, str, str]: with conn.cursor() as cur: db = DB(cur=cur) return TrendReportTextGenerator( db=db, daily_qst_thread_id=daily_qst_thread_id, date=date, rank_include_rule=rank_include_rule, rank_page_capacity=rank_page_capacity, uuid=uuid, should_compare_with_last_day=should_compare_with_last_day, ).generate()
def migrate_post_table(conn_s3: sqlite3.Connection, conn_pg: psycopg2._psycopg.connection): with conn_pg.cursor() as cur_pg: n = count_rows(conn_s3, 'post') for i, [ id, parent_thread_id, created_at, user_id, content, attachment_base, attachment_extension, name, email, title, _, # misc_fields, ] in enumerate(conn_s3.execute(r'SELECT * FROM post')): cur_pg.execute( r''' CALL record_response(''' + ', '.join([r'%s'] * 12) + r''') ''', ( id, parent_thread_id, ts2dt(created_at), user_id, content, attachment_base or '', attachment_extension or '', name or '', email or '', title or '', # 迁移前的代码不小心会以主串的 misc_fields 作为回应的 misc_fields psycopg2.extras.Json(None), None, )) if i % 100 == 0: print(f"post: {i+1}/{n} {ts2dt(created_at)}")
def migrate_thread_table(conn_s3: sqlite3.Connection, conn_pg: psycopg2._psycopg.connection): with conn_pg.cursor() as cur_pg: last = [None, None] for [ id, content, name, email, title, created_at, user_id, attachment_base, attachment_extension, misc_fields, not_anymore_at_least_after, ] in conn_s3.execute(r''' SELECT thread_old_revision.id, thread_old_revision.content, thread_old_revision.name, thread_old_revision.email, thread_old_revision.title, thread.created_at, thread.user_id, thread.attachment_base, thread.attachment_extension, thread.misc_fields, not_anymore_at_least_after FROM thread_old_revision LEFT JOIN thread ON thread_old_revision.id = thread.id ORDER BY thread_old_revision.id ASC, not_anymore_at_least_after ASC '''): updated_at = None if last[0] == id: updated_at = ts2dt(last[1]) else: updated_at = find_updated_at(conn_s3, id, None) misc_fields = remove_field(misc_fields, 'fid') cur_pg.execute( r''' CALL record_thread(''' + ', '.join([r'%s'] * 13) + r''') ''', ( id, QST_BOARD_ID, ts2dt(created_at), user_id, content, attachment_base or '', attachment_extension or '', name or '', email or '', title or '', misc_fields or psycopg2.extras.Json(None), None, updated_at, )) last = [id, not_anymore_at_least_after] for [ id, created_at, user_id, content, current_reply_count, attachment_base, attachment_extension, name, email, title, misc_fields, latest_checked_at, is_disappeared, current_revision_checked_at, ] in conn_s3.execute(r''' SELECT thread.*, checked_at AS latest_checked_at, is_disappeared, MAX(not_anymore_at_least_after) AS current_revision_checked_at FROM thread LEFT JOIN thread_extra ON thread.id = thread_extra.id LEFT JOIN thread_old_revision ON thread.id = thread_old_revision.id GROUP BY thread.id '''): misc_fields = remove_field(misc_fields, 'fid') cur_pg.execute( r''' CALL record_thread(''' + ', '.join([r'%s'] * 13) + r''') ''', ( id, QST_BOARD_ID, ts2dt(created_at), user_id, content, attachment_base or '', attachment_extension or '', name or '', email or '', title or '', misc_fields or psycopg2.extras.Json(None), current_reply_count, find_updated_at(conn_s3, id, ts2dt(current_revision_checked_at)), )) cur_pg.execute( r''' CALL report_is_thread_disappeared(%s, %s, %s) ''', (id, bool(is_disappeared), ts2dt(latest_checked_at)))
def each_content_on(conn: psycopg2._psycopg.connection, subject_date: date): with conn.cursor() as cur: cur.execute(r'SELECT id, parent_thread_id, content FROM post WHERE in_boundaries(created_at, %s::timestamptz, %s::timestamptz)', get_range(subject_date)) for [id, parent_thread_id, content] in cur: yield [parent_thread_id or id, content]
def total_contents(conn: psycopg2._psycopg.connection, subject_date: date): with conn.cursor() as cur: cur.execute( r'SELECT count(id) FROM post WHERE in_boundaries(created_at, %s::timestamptz, %s::timestamptz)', get_range(subject_date)) return cur.fetchone()[0]
def update_dc(conn: psycopg2._psycopg.connection, subject_date: date): """dc = data count""" dc_file_path = os.path.join(sys.path[0], 'dc.json') if not Path(dc_file_path).exists(): dc = {} # 有多少组 dc['n'] = 0 # 如果以天、主串分组,某个词一共出现在几组过 dc['count'] = defaultdict(int) # 某个词后面跟了哪些词多少次 dc['next'] = NextDict({}) with conn.cursor() as cur: cur: psycopg2._psycopg.cursor = cur cur.execute(r''' SELECT run_at FROM activity ORDER BY run_at ASC LIMIT 1 ''') first_created_at: datetime = cur.fetchone()[0] if first_created_at.hour < 4: first_created_at = first_created_at.date() - timedelta(days=1) else: first_created_at = first_created_at.date() # 第一天大概率不全 start_date = first_created_at + timedelta(days=1) else: with open(dc_file_path, 'r') as f: dc = json.load(f) start_date = date.fromisoformat( dc['last_updated_date']) + timedelta(days=1) dc['count'] = defaultdict(int, **dc['count']) dc['next'] = NextDict(dc['next']) print(f"dc start date: {start_date.isoformat()}") updated = False if subject_date >= start_date: # https://stackoverflow.com/a/24637447 for current_date in [start_date + timedelta(days=x) for x in range(0, (subject_date-start_date).days + 1)]: updated = True total = total_contents(conn, current_date) # dc['n'] += total dc['n'] += 1 # seen_words_per_thread = defaultdict(set) seen_words_today = set() for i, [thread_id, content] in enumerate(each_content_on(conn, current_date)): if i % 1000 == 0: print(f'dc {current_date.isoformat()} {i+1}/{total}') words = segment_content(content) for i, word in enumerate(words): if word == ' ': continue # seen_words_per_thread[thread_id].add(word) seen_words_today.add(word) if i < len(words) - 1 and words[i+1] != ' ': dc['next'][word][words[i+1]] += 1 dc['next'][word]['$total'] += 1 # counts = Counter( # reduce(add, map(lambda x: list(x), seen_words_per_thread.values()))) counts = Counter(list(seen_words_today)) dc['count'] = Counter(dc['count']) + counts dc['last_updated_date'] = subject_date.isoformat() if updated: def i_f___ing_hate_python(obj): if isinstance(obj, NextDict): return obj.get_dict() return obj.__dict__ with open(dc_file_path, 'w+') as f: json.dump(dc, f, sort_keys=True, indent=4, ensure_ascii=False, default=i_f___ing_hate_python) dc['count'] = Counter(**dc['count']) return dc
def never_collected(conn: psycopg2._psycopg.connection) -> bool: with conn.cursor() as cur: cur: psycopg2._psycopg.cursor = cur cur.execute(r'SELECT * FROM never_collected()') return cur.fetchone()[0]
def get_last_activity_run_at(conn: psycopg2._psycopg.connection, activity_type: str): with conn.cursor() as cur: cur: psycopg2._psycopg.cursor = cur cur.execute(r'SELECT get_last_activity_run_at(%s)', (activity_type,)) return cur.fetchone()[0]
def import_user_card_changes(self, connection: psycopg2._psycopg.connection, remove_existing: bool): cur = connection.cursor() cur.execute(""" SELECT DISTINCT auth_user.username FROM cards_usercardchange user_card_change JOIN auth_user ON auth_user.id = user_card_change.owner_id """) users_with_changes = cur.fetchall() if remove_existing and users_with_changes: for (username, ) in users_with_changes: user = User.objects.get(username=username) card_changes = UserCardChange.objects.filter(owner=user) if card_changes.exists(): logger.info( "Clearing %s user card changes from %s", card_changes.count(), username, ) card_changes.delete() cur.execute(""" SELECT card.name, user_card_change.difference, user_card_change.date, cards_set.code, auth_user.username, cards_language.name, cardprinting.scryfall_id, cardprinting.json_id AS face_uuid FROM cards_usercardchange user_card_change JOIN auth_user ON auth_user.id = user_card_change.owner_id JOIN cards_physicalcard physical_card ON physical_card.id = user_card_change.physical_card_id JOIN cards_cardprintinglanguage_physical_cards printlang_to_physicalcard ON printlang_to_physicalcard.physicalcard_Id = physical_card.id JOIN cards_cardprintinglanguage printlang ON printlang.id = printlang_to_physicalcard.cardprintinglanguage_id JOIN cards_language ON cards_language.id = printlang.language_id JOIN cards_cardprinting cardprinting ON cardprinting.id = printlang.card_printing_id JOIN cards_card card ON card.id = cardprinting.card_id JOIN cards_set ON cards_set.id = cardprinting.set_id ORDER BY user_card_change.id ASC """) for ( card_name, difference, change_date, set_code, username, language_name, scryfall_id, face_uuid, ) in cur.fetchall(): print(card_name, difference, set_code, change_date) user = User.objects.get(username=username) card_localisation = self.get_card_localisation( scryfall_id=scryfall_id, language_name=language_name) existing_change = UserCardChange.objects.filter( owner=user, card_localisation=card_localisation, date=change_date) if (existing_change.exists() and card_localisation.localised_faces.count() > 1): continue UserCardChange.objects.create( owner=user, card_localisation=card_localisation, difference=difference, date=change_date, )
def import_user_owned_cards(self, connection: psycopg2._psycopg.connection, remove_existing: bool): cur = connection.cursor() cur.execute(""" SELECT DISTINCT(auth_user.username) FROM cards_userownedcard user_owned_card JOIN auth_user ON auth_user.id = user_owned_card.owner_id """) users_with_cards = cur.fetchall() if remove_existing and users_with_cards: for (username, ) in users_with_cards: user = User.objects.get(username=username) user_cards = UserOwnedCard.objects.filter(owner=user) if user_cards.exists(): logger.info("Clearing %s user cards from %s", user_cards.count(), username) user_cards.delete() cur.execute(""" SELECT card.name, user_owned_card.count, cards_set.code, auth_user.username, cards_language.name, cardprinting.scryfall_id, cardprinting.json_id AS face_uuid FROM cards_userownedcard user_owned_card JOIN auth_user ON auth_user.id = user_owned_card.owner_id JOIN cards_physicalcard physical_card ON physical_card.id = user_owned_card.physical_card_id JOIN cards_cardprintinglanguage_physical_cards printlang_to_physicalcard ON printlang_to_physicalcard.physicalcard_Id = physical_card.id JOIN cards_cardprintinglanguage printlang ON printlang.id = printlang_to_physicalcard.cardprintinglanguage_id JOIN cards_language ON cards_language.id = printlang.language_id JOIN cards_cardprinting cardprinting ON cardprinting.id = printlang.card_printing_id JOIN cards_card card ON card.id = cardprinting.card_id JOIN cards_set ON cards_set.id = cardprinting.set_id ORDER BY user_owned_card.id ASC """) for ( card_name, count, set_code, username, language_name, scryfall_id, face_uuid, ) in cur.fetchall(): print(card_name, count, set_code) user = User.objects.get(username=username) localisation = self.get_card_localisation( scryfall_id=scryfall_id, language_name=language_name) existing_user_card = UserOwnedCard.objects.filter( owner=user, card_localisation=localisation) if existing_user_card.exists(): if localisation.localised_faces.count() >= 1: continue result = query_yes_no("{} already exists, skip it?".format( existing_user_card.first())) if not result: raise Exception UserOwnedCard.objects.create(owner=user, card_localisation=localisation, count=count)