コード例 #1
0
def finish_task(
    connection: psycopg2._psycopg.connection, task_id: int, status: str
) -> None:
    with connection.cursor() as cursor:
        cursor.execute("""SELECT finish_task(%s, %s);""", (task_id, status))

    connection.commit()
コード例 #2
0
def listen_queue(connection: psycopg2._psycopg.connection, queue: str) -> None:
    queue_name = sql.Identifier(f"queue#{queue}")

    with connection.cursor() as cursor:
        cursor.execute(
            sql.SQL("""LISTEN {queue_name};""").format(queue_name=queue_name)
        )
コード例 #3
0
def register_queue(
    connection: psycopg2._psycopg.connection, queue: str
) -> Optional[int]:
    with connection.cursor() as cursor:
        cursor.execute(
            """INSERT INTO queues (queue_name)
               VALUES (%s)
               ON CONFLICT DO NOTHING
               RETURNING id
               """,
            (queue,),
        )
        row = cursor.fetchone()

    connection.commit()

    return row[0] if row else None
コード例 #4
0
    def is_report_published(conn: psycopg2._psycopg.connection,
                            subject_date: date, report_type: str) -> bool:

        with conn.cursor() as cur:
            cur: psycopg2._psycopg.cursor = cur
            cur.execute(r'''SELECT * FROM is_report_published(%s, %s)''',
                        (subject_date, report_type))
            return cur.fetchone()[0]
コード例 #5
0
 def import_users(self, connection: psycopg2._psycopg.connection):
     cur = connection.cursor()
     cur.execute("SELECT username, date_joined FROM auth_user")
     for username, date_joined in cur.fetchall():
         if not User.objects.filter(username=username).exists():
             logger.info("Creating user %s", username)
             User.objects.create_user(username=username,
                                      date_joined=date_joined,
                                      password="******")
コード例 #6
0
def get_tasks(
    connection: psycopg2._psycopg.connection, queue: str
) -> Iterator[TaskRow]:
    with connection.cursor(cursor_factory=RealDictCursor) as cursor:
        while True:
            cursor.execute(
                """SELECT id, args, targeted_object, task_type FROM fetch_task(%s);""",
                (queue,),
            )
            connection.commit()

            row = cursor.fetchone()

            # fetch_tasks will always return a row, but is there's no relevant
            # value, it will all be None
            if row["id"] is None:
                return

            yield TaskRow(**row)
コード例 #7
0
def launch_task(
    connection: psycopg2._psycopg.connection,
    queue: str,
    name: str,
    lock: str,
    kwargs: types.JSONDict,
) -> int:
    with connection.cursor() as cursor:
        cursor.execute(
            """INSERT INTO tasks (queue_id, task_type, targeted_object, args)
               SELECT id, %s, %s, %s FROM queues WHERE queue_name=%s
               RETURNING id;""",
            (name, lock, kwargs, queue),
        )
        row = cursor.fetchone()

    if not row:
        raise exceptions.QueueNotFound(queue)

    connection.commit()
    return row[0]
コード例 #8
0
def retrieve_data_then_generate_trend_report_text(
    conn: psycopg2._psycopg.connection,
    daily_qst_thread_id: int,
    date: datetime,
    uuid: str,
    rank_include_rule: IncludeRule,
    rank_page_capacity: int,
    should_compare_with_last_day: bool,
) -> Tuple[str, str, str]:
    with conn.cursor() as cur:
        db = DB(cur=cur)
        return TrendReportTextGenerator(
            db=db,
            daily_qst_thread_id=daily_qst_thread_id,
            date=date,
            rank_include_rule=rank_include_rule,
            rank_page_capacity=rank_page_capacity,
            uuid=uuid,
            should_compare_with_last_day=should_compare_with_last_day,
        ).generate()
コード例 #9
0
def migrate_post_table(conn_s3: sqlite3.Connection,
                       conn_pg: psycopg2._psycopg.connection):
    with conn_pg.cursor() as cur_pg:
        n = count_rows(conn_s3, 'post')
        for i, [
                id,
                parent_thread_id,
                created_at,
                user_id,
                content,
                attachment_base,
                attachment_extension,
                name,
                email,
                title,
                _,  # misc_fields,
        ] in enumerate(conn_s3.execute(r'SELECT * FROM post')):
            cur_pg.execute(
                r'''
                CALL record_response(''' + ', '.join([r'%s'] * 12) + r''')
                ''',
                (
                    id,
                    parent_thread_id,
                    ts2dt(created_at),
                    user_id,
                    content,
                    attachment_base or '',
                    attachment_extension or '',
                    name or '',
                    email or '',
                    title or '',
                    # 迁移前的代码不小心会以主串的 misc_fields 作为回应的 misc_fields
                    psycopg2.extras.Json(None),
                    None,
                ))
            if i % 100 == 0:
                print(f"post: {i+1}/{n} {ts2dt(created_at)}")
コード例 #10
0
def migrate_thread_table(conn_s3: sqlite3.Connection,
                         conn_pg: psycopg2._psycopg.connection):

    with conn_pg.cursor() as cur_pg:
        last = [None, None]
        for [
                id,
                content,
                name,
                email,
                title,
                created_at,
                user_id,
                attachment_base,
                attachment_extension,
                misc_fields,
                not_anymore_at_least_after,
        ] in conn_s3.execute(r'''
            SELECT
                thread_old_revision.id, thread_old_revision.content,
                thread_old_revision.name, thread_old_revision.email, thread_old_revision.title,
                thread.created_at, thread.user_id, thread.attachment_base, thread.attachment_extension,
                thread.misc_fields,
                not_anymore_at_least_after
            FROM thread_old_revision
            LEFT JOIN thread ON thread_old_revision.id = thread.id
            ORDER BY thread_old_revision.id ASC, not_anymore_at_least_after ASC
        '''):
            updated_at = None
            if last[0] == id:
                updated_at = ts2dt(last[1])
            else:
                updated_at = find_updated_at(conn_s3, id, None)
            misc_fields = remove_field(misc_fields, 'fid')
            cur_pg.execute(
                r'''
                CALL record_thread(''' + ', '.join([r'%s'] * 13) + r''')
                ''', (
                    id,
                    QST_BOARD_ID,
                    ts2dt(created_at),
                    user_id,
                    content,
                    attachment_base or '',
                    attachment_extension or '',
                    name or '',
                    email or '',
                    title or '',
                    misc_fields or psycopg2.extras.Json(None),
                    None,
                    updated_at,
                ))
            last = [id, not_anymore_at_least_after]

        for [
                id,
                created_at,
                user_id,
                content,
                current_reply_count,
                attachment_base,
                attachment_extension,
                name,
                email,
                title,
                misc_fields,
                latest_checked_at,
                is_disappeared,
                current_revision_checked_at,
        ] in conn_s3.execute(r'''
                SELECT thread.*,
                    checked_at AS latest_checked_at, is_disappeared,
                    MAX(not_anymore_at_least_after) AS current_revision_checked_at
                FROM thread
                LEFT JOIN thread_extra ON thread.id = thread_extra.id
                LEFT JOIN thread_old_revision ON thread.id = thread_old_revision.id
                GROUP BY thread.id
            '''):
            misc_fields = remove_field(misc_fields, 'fid')
            cur_pg.execute(
                r'''
                CALL record_thread(''' + ', '.join([r'%s'] * 13) + r''')
                ''', (
                    id,
                    QST_BOARD_ID,
                    ts2dt(created_at),
                    user_id,
                    content,
                    attachment_base or '',
                    attachment_extension or '',
                    name or '',
                    email or '',
                    title or '',
                    misc_fields or psycopg2.extras.Json(None),
                    current_reply_count,
                    find_updated_at(conn_s3, id,
                                    ts2dt(current_revision_checked_at)),
                ))
            cur_pg.execute(
                r'''
                CALL report_is_thread_disappeared(%s, %s, %s)
            ''', (id, bool(is_disappeared), ts2dt(latest_checked_at)))
コード例 #11
0
def each_content_on(conn: psycopg2._psycopg.connection, subject_date: date):
    with conn.cursor() as cur:
        cur.execute(r'SELECT id, parent_thread_id,  content FROM post WHERE in_boundaries(created_at, %s::timestamptz, %s::timestamptz)',
                    get_range(subject_date))
        for [id, parent_thread_id, content] in cur:
            yield [parent_thread_id or id, content]
コード例 #12
0
def total_contents(conn: psycopg2._psycopg.connection, subject_date: date):
    with conn.cursor() as cur:
        cur.execute(
            r'SELECT count(id) FROM post WHERE in_boundaries(created_at, %s::timestamptz, %s::timestamptz)', get_range(subject_date))
        return cur.fetchone()[0]
コード例 #13
0
def update_dc(conn: psycopg2._psycopg.connection, subject_date: date):
    """dc = data count"""
    dc_file_path = os.path.join(sys.path[0], 'dc.json')
    if not Path(dc_file_path).exists():
        dc = {}
        # 有多少组
        dc['n'] = 0
        # 如果以天、主串分组,某个词一共出现在几组过
        dc['count'] = defaultdict(int)
        # 某个词后面跟了哪些词多少次
        dc['next'] = NextDict({})
        with conn.cursor() as cur:
            cur: psycopg2._psycopg.cursor = cur
            cur.execute(r'''
                SELECT run_at FROM activity
                ORDER BY run_at ASC
                LIMIT 1
            ''')
            first_created_at: datetime = cur.fetchone()[0]
            if first_created_at.hour < 4:
                first_created_at = first_created_at.date() - timedelta(days=1)
            else:
                first_created_at = first_created_at.date()
            # 第一天大概率不全
            start_date = first_created_at + timedelta(days=1)
    else:
        with open(dc_file_path, 'r') as f:
            dc = json.load(f)
            start_date = date.fromisoformat(
                dc['last_updated_date']) + timedelta(days=1)
            dc['count'] = defaultdict(int, **dc['count'])
            dc['next'] = NextDict(dc['next'])

    print(f"dc start date: {start_date.isoformat()}")

    updated = False
    if subject_date >= start_date:
        # https://stackoverflow.com/a/24637447
        for current_date in [start_date + timedelta(days=x) for x in range(0, (subject_date-start_date).days + 1)]:
            updated = True
            total = total_contents(conn, current_date)
            # dc['n'] += total
            dc['n'] += 1
            # seen_words_per_thread = defaultdict(set)
            seen_words_today = set()
            for i, [thread_id, content] in enumerate(each_content_on(conn, current_date)):
                if i % 1000 == 0:
                    print(f'dc {current_date.isoformat()} {i+1}/{total}')
                words = segment_content(content)
                for i, word in enumerate(words):
                    if word == ' ':
                        continue
                    # seen_words_per_thread[thread_id].add(word)
                    seen_words_today.add(word)
                    if i < len(words) - 1 and words[i+1] != ' ':
                        dc['next'][word][words[i+1]] += 1
                    dc['next'][word]['$total'] += 1
            # counts = Counter(
            #     reduce(add, map(lambda x: list(x), seen_words_per_thread.values())))
            counts = Counter(list(seen_words_today))
            dc['count'] = Counter(dc['count']) + counts

        dc['last_updated_date'] = subject_date.isoformat()

    if updated:
        def i_f___ing_hate_python(obj):
            if isinstance(obj, NextDict):
                return obj.get_dict()
            return obj.__dict__
        with open(dc_file_path, 'w+') as f:
            json.dump(dc, f, sort_keys=True, indent=4,
                      ensure_ascii=False, default=i_f___ing_hate_python)

    dc['count'] = Counter(**dc['count'])

    return dc
コード例 #14
0
 def never_collected(conn: psycopg2._psycopg.connection) -> bool:
     with conn.cursor() as cur:
         cur: psycopg2._psycopg.cursor = cur
         cur.execute(r'SELECT * FROM never_collected()')
         return cur.fetchone()[0]
コード例 #15
0
 def get_last_activity_run_at(conn: psycopg2._psycopg.connection, activity_type: str):
     with conn.cursor() as cur:
         cur: psycopg2._psycopg.cursor = cur
         cur.execute(r'SELECT get_last_activity_run_at(%s)',
                     (activity_type,))
         return cur.fetchone()[0]
コード例 #16
0
    def import_user_card_changes(self,
                                 connection: psycopg2._psycopg.connection,
                                 remove_existing: bool):
        cur = connection.cursor()

        cur.execute("""
SELECT 
DISTINCT auth_user.username  
FROM cards_usercardchange user_card_change
JOIN auth_user
ON auth_user.id = user_card_change.owner_id
""")
        users_with_changes = cur.fetchall()

        if remove_existing and users_with_changes:
            for (username, ) in users_with_changes:
                user = User.objects.get(username=username)
                card_changes = UserCardChange.objects.filter(owner=user)
                if card_changes.exists():
                    logger.info(
                        "Clearing %s user card changes from %s",
                        card_changes.count(),
                        username,
                    )
                    card_changes.delete()

        cur.execute("""
SELECT 
card.name, 
user_card_change.difference, 
user_card_change.date,
cards_set.code, 
auth_user.username, 
cards_language.name, 
cardprinting.scryfall_id, 
cardprinting.json_id AS face_uuid
FROM cards_usercardchange user_card_change
JOIN auth_user
ON auth_user.id = user_card_change.owner_id
JOIN cards_physicalcard physical_card
ON physical_card.id = user_card_change.physical_card_id
JOIN cards_cardprintinglanguage_physical_cards printlang_to_physicalcard
ON printlang_to_physicalcard.physicalcard_Id = physical_card.id
JOIN cards_cardprintinglanguage printlang
ON printlang.id = printlang_to_physicalcard.cardprintinglanguage_id
JOIN cards_language
ON cards_language.id = printlang.language_id
JOIN cards_cardprinting cardprinting
ON cardprinting.id = printlang.card_printing_id
JOIN cards_card card ON card.id = cardprinting.card_id
JOIN cards_set ON cards_set.id = cardprinting.set_id
ORDER BY user_card_change.id ASC
""")
        for (
                card_name,
                difference,
                change_date,
                set_code,
                username,
                language_name,
                scryfall_id,
                face_uuid,
        ) in cur.fetchall():
            print(card_name, difference, set_code, change_date)
            user = User.objects.get(username=username)
            card_localisation = self.get_card_localisation(
                scryfall_id=scryfall_id, language_name=language_name)
            existing_change = UserCardChange.objects.filter(
                owner=user,
                card_localisation=card_localisation,
                date=change_date)
            if (existing_change.exists()
                    and card_localisation.localised_faces.count() > 1):
                continue
            UserCardChange.objects.create(
                owner=user,
                card_localisation=card_localisation,
                difference=difference,
                date=change_date,
            )
コード例 #17
0
    def import_user_owned_cards(self, connection: psycopg2._psycopg.connection,
                                remove_existing: bool):
        cur = connection.cursor()

        cur.execute("""
SELECT 
DISTINCT(auth_user.username) 
FROM cards_userownedcard user_owned_card
JOIN auth_user
ON auth_user.id = user_owned_card.owner_id
""")
        users_with_cards = cur.fetchall()

        if remove_existing and users_with_cards:
            for (username, ) in users_with_cards:
                user = User.objects.get(username=username)
                user_cards = UserOwnedCard.objects.filter(owner=user)
                if user_cards.exists():
                    logger.info("Clearing %s user cards from %s",
                                user_cards.count(), username)
                    user_cards.delete()

        cur.execute("""
SELECT 
card.name, 
user_owned_card.count, 
cards_set.code, 
auth_user.username, 
cards_language.name, 
cardprinting.scryfall_id, 
cardprinting.json_id AS face_uuid
FROM cards_userownedcard user_owned_card
JOIN auth_user
ON auth_user.id = user_owned_card.owner_id
JOIN cards_physicalcard physical_card
ON physical_card.id = user_owned_card.physical_card_id
JOIN cards_cardprintinglanguage_physical_cards printlang_to_physicalcard
ON printlang_to_physicalcard.physicalcard_Id = physical_card.id
JOIN cards_cardprintinglanguage printlang
ON printlang.id = printlang_to_physicalcard.cardprintinglanguage_id
JOIN cards_language
ON cards_language.id = printlang.language_id
JOIN cards_cardprinting cardprinting
ON cardprinting.id = printlang.card_printing_id
JOIN cards_card card ON card.id = cardprinting.card_id
JOIN cards_set ON cards_set.id = cardprinting.set_id
ORDER BY user_owned_card.id ASC
""")
        for (
                card_name,
                count,
                set_code,
                username,
                language_name,
                scryfall_id,
                face_uuid,
        ) in cur.fetchall():
            print(card_name, count, set_code)
            user = User.objects.get(username=username)
            localisation = self.get_card_localisation(
                scryfall_id=scryfall_id, language_name=language_name)
            existing_user_card = UserOwnedCard.objects.filter(
                owner=user, card_localisation=localisation)
            if existing_user_card.exists():
                if localisation.localised_faces.count() >= 1:
                    continue
                result = query_yes_no("{} already exists, skip it?".format(
                    existing_user_card.first()))
                if not result:
                    raise Exception
            UserOwnedCard.objects.create(owner=user,
                                         card_localisation=localisation,
                                         count=count)