コード例 #1
0
ファイル: populate_db.py プロジェクト: yv-official/zulip
def choose_date_sent(num_messages: int, tot_messages: int,
                     threads: int) -> datetime:
    # Spoofing time not supported with threading
    if threads != 1:
        return timezone_now()

    # Distrubutes 80% of messages starting from 5 days ago, over a period
    # of 3 days. Then, distributes remaining messages over past 24 hours.
    amount_in_first_chunk = int(tot_messages * 0.8)
    amount_in_second_chunk = tot_messages - amount_in_first_chunk
    if (num_messages < amount_in_first_chunk):
        # Distribute starting from 5 days ago, over a period
        # of 3 days:
        spoofed_date = timezone_now() - timezone_timedelta(days=5)
        interval_size = 3 * 24 * 60 * 60 / amount_in_first_chunk
        lower_bound = interval_size * num_messages
        upper_bound = interval_size * (num_messages + 1)

    else:
        # We're in the last 20% of messages, distribute them over the last 24 hours:
        spoofed_date = timezone_now() - timezone_timedelta(days=1)
        interval_size = 24 * 60 * 60 / amount_in_second_chunk
        lower_bound = interval_size * (num_messages - amount_in_first_chunk)
        upper_bound = interval_size * (num_messages - amount_in_first_chunk +
                                       1)

    offset_seconds = random.uniform(lower_bound, upper_bound)
    spoofed_date += timezone_timedelta(seconds=offset_seconds)

    return spoofed_date
コード例 #2
0
ファイル: populate_db.py プロジェクト: kagonlineteam/zulip
def choose_date_sent(
    num_messages: int, tot_messages: int, oldest_message_days: int, threads: int
) -> datetime:
    # Spoofing time not supported with threading
    if threads != 1:
        return timezone_now()

    # We want to ensure that:
    # (1) some messages are sent in the last 4 hours,
    # (2) there are some >24hr gaps between adjacent messages, and
    # (3) a decent bulk of messages in the last day so you see adjacent messages with the same date.
    # So we distribute 80% of messages starting from oldest_message_days days ago, over a period
    # of the first min(oldest_message_days-2, 1) of those days. Then, distributes remaining messages
    # over the past 24 hours.
    amount_in_first_chunk = int(tot_messages * 0.8)
    amount_in_second_chunk = tot_messages - amount_in_first_chunk

    if num_messages < amount_in_first_chunk:
        spoofed_date = timezone_now() - timezone_timedelta(days=oldest_message_days)
        num_days_for_first_chunk = min(oldest_message_days - 2, 1)
        interval_size = num_days_for_first_chunk * 24 * 60 * 60 / amount_in_first_chunk
        lower_bound = interval_size * num_messages
        upper_bound = interval_size * (num_messages + 1)

    else:
        # We're in the last 20% of messages, so distribute them over the last 24 hours:
        spoofed_date = timezone_now() - timezone_timedelta(days=1)
        interval_size = 24 * 60 * 60 / amount_in_second_chunk
        lower_bound = interval_size * (num_messages - amount_in_first_chunk)
        upper_bound = interval_size * (num_messages - amount_in_first_chunk + 1)

    offset_seconds = random.uniform(lower_bound, upper_bound)
    spoofed_date += timezone_timedelta(seconds=offset_seconds)

    return spoofed_date
コード例 #3
0
    def test_choose_pub_date_large_tot_messages(self) -> None:
        """
        Test for a bug that was present, where specifying a large amount of messages to generate
        would cause each message to have pub_date set to timezone_now(), instead of the pub_dates
        being distributed across the span of several days.
        """
        tot_messages = 1000000
        datetimes_list = [
            choose_pub_date(i, tot_messages, 1)
            for i in range(1, tot_messages, tot_messages // 100)
        ]

        # Verify there is a meaningful difference between elements.
        for i in range(1, len(datetimes_list)):
            self.assertTrue(
                datetimes_list[i] -
                datetimes_list[i - 1] > timezone_timedelta(minutes=5))
コード例 #4
0
def send_messages(
    data: Tuple[int, Sequence[Sequence[int]], Mapping[str, Any],
                Callable[[str], Any], int]
) -> int:
    (tot_messages, personals_pairs, options, output, random_seed) = data
    random.seed(random_seed)

    with open("var/test_messages.json", "r") as infile:
        dialog = ujson.load(infile)
    random.shuffle(dialog)
    texts = itertools.cycle(dialog)

    recipient_streams = [
        klass.id for klass in Recipient.objects.filter(type=Recipient.STREAM)
    ]  # type: List[int]
    recipient_huddles = [
        h.id for h in Recipient.objects.filter(type=Recipient.HUDDLE)
    ]  # type: List[int]

    huddle_members = {}  # type: Dict[int, List[int]]
    for h in recipient_huddles:
        huddle_members[h] = [
            s.user_profile.id
            for s in Subscription.objects.filter(recipient_id=h)
        ]

    num_messages = 0
    random_max = 1000000
    recipients = {}  # type: Dict[int, Tuple[int, int, Dict[str, Any]]]
    while num_messages < tot_messages:
        saved_data = {}  # type: Dict[str, Any]
        message = Message()
        message.sending_client = get_client('populate_db')

        message.content = next(texts)

        randkey = random.randint(1, random_max)
        if (num_messages > 0
                and random.randint(1, random_max) * 100. / random_max <
                options["stickyness"]):
            # Use an old recipient
            message_type, recipient_id, saved_data = recipients[num_messages -
                                                                1]
            if message_type == Recipient.PERSONAL:
                personals_pair = saved_data['personals_pair']
                random.shuffle(personals_pair)
            elif message_type == Recipient.STREAM:
                message.subject = saved_data['subject']
                message.recipient = get_recipient_by_id(recipient_id)
            elif message_type == Recipient.HUDDLE:
                message.recipient = get_recipient_by_id(recipient_id)
        elif (randkey <= random_max * options["percent_huddles"] / 100.):
            message_type = Recipient.HUDDLE
            message.recipient = get_recipient_by_id(
                random.choice(recipient_huddles))
        elif (randkey <= random_max *
              (options["percent_huddles"] + options["percent_personals"]) /
              100.):
            message_type = Recipient.PERSONAL
            personals_pair = random.choice(personals_pairs)
            random.shuffle(personals_pair)
        elif (randkey <= random_max * 1.0):
            message_type = Recipient.STREAM
            message.recipient = get_recipient_by_id(
                random.choice(recipient_streams))

        if message_type == Recipient.HUDDLE:
            sender_id = random.choice(huddle_members[message.recipient.id])
            message.sender = get_user_profile_by_id(sender_id)
        elif message_type == Recipient.PERSONAL:
            message.recipient = Recipient.objects.get(
                type=Recipient.PERSONAL, type_id=personals_pair[0])
            message.sender = get_user_profile_by_id(personals_pair[1])
            saved_data['personals_pair'] = personals_pair
        elif message_type == Recipient.STREAM:
            stream = Stream.objects.get(id=message.recipient.type_id)
            # Pick a random subscriber to the stream
            message.sender = random.choice(
                Subscription.objects.filter(
                    recipient=message.recipient)).user_profile
            message.subject = stream.name + str(random.randint(1, 3))
            saved_data['subject'] = message.subject

        # Spoofing time not supported with threading
        if options['threads'] != 1:
            message.pub_date = timezone_now()
        else:
            # Distrubutes 80% of messages starting from 5 days ago, over a period
            # of 3 days. Then, distributes remaining messages over past 24 hours.
            spoofed_date = timezone_now() - timezone_timedelta(days=5)
            if (num_messages < tot_messages * 0.8):
                # Maximum of 3 days ahead, convert to minutes
                time_ahead = 3 * 24 * 60
                time_ahead //= int(tot_messages * 0.8)
            else:
                time_ahead = 24 * 60
                time_ahead //= int(tot_messages * 0.2)

            spoofed_minute = random.randint(time_ahead * num_messages,
                                            time_ahead * (num_messages + 1))
            spoofed_date += timezone_timedelta(minutes=spoofed_minute)
            message.pub_date = spoofed_date

        # We disable USING_RABBITMQ here, so that deferred work is
        # executed in do_send_message_messages, rather than being
        # queued.  This is important, because otherwise, if run-dev.py
        # wasn't running when populate_db was run, a developer can end
        # up with queued events that reference objects from a previous
        # life of the database, which naturally throws exceptions.
        settings.USING_RABBITMQ = False
        do_send_messages([{'message': message}])
        settings.USING_RABBITMQ = True

        recipients[num_messages] = (message_type, message.recipient.id,
                                    saved_data)
        num_messages += 1
    return tot_messages
コード例 #5
0
ファイル: populate_db.py プロジェクト: rishig/zulip
def send_messages(data: Tuple[int, Sequence[Sequence[int]], Mapping[str, Any],
                              Callable[[str], Any], int]) -> int:
    (tot_messages, personals_pairs, options, output, random_seed) = data
    random.seed(random_seed)

    with open("var/test_messages.json", "r") as infile:
        dialog = ujson.load(infile)
    random.shuffle(dialog)
    texts = itertools.cycle(dialog)

    recipient_streams = [klass.id for klass in
                         Recipient.objects.filter(type=Recipient.STREAM)]  # type: List[int]
    recipient_huddles = [h.id for h in Recipient.objects.filter(type=Recipient.HUDDLE)]  # type: List[int]

    huddle_members = {}  # type: Dict[int, List[int]]
    for h in recipient_huddles:
        huddle_members[h] = [s.user_profile.id for s in
                             Subscription.objects.filter(recipient_id=h)]

    num_messages = 0
    random_max = 1000000
    recipients = {}  # type: Dict[int, Tuple[int, int, Dict[str, Any]]]
    while num_messages < tot_messages:
        saved_data = {}  # type: Dict[str, Any]
        message = Message()
        message.sending_client = get_client('populate_db')

        message.content = next(texts)

        randkey = random.randint(1, random_max)
        if (num_messages > 0 and
                random.randint(1, random_max) * 100. / random_max < options["stickyness"]):
            # Use an old recipient
            message_type, recipient_id, saved_data = recipients[num_messages - 1]
            if message_type == Recipient.PERSONAL:
                personals_pair = saved_data['personals_pair']
                random.shuffle(personals_pair)
            elif message_type == Recipient.STREAM:
                message.subject = saved_data['subject']
                message.recipient = get_recipient_by_id(recipient_id)
            elif message_type == Recipient.HUDDLE:
                message.recipient = get_recipient_by_id(recipient_id)
        elif (randkey <= random_max * options["percent_huddles"] / 100.):
            message_type = Recipient.HUDDLE
            message.recipient = get_recipient_by_id(random.choice(recipient_huddles))
        elif (randkey <= random_max * (options["percent_huddles"] + options["percent_personals"]) / 100.):
            message_type = Recipient.PERSONAL
            personals_pair = random.choice(personals_pairs)
            random.shuffle(personals_pair)
        elif (randkey <= random_max * 1.0):
            message_type = Recipient.STREAM
            message.recipient = get_recipient_by_id(random.choice(recipient_streams))

        if message_type == Recipient.HUDDLE:
            sender_id = random.choice(huddle_members[message.recipient.id])
            message.sender = get_user_profile_by_id(sender_id)
        elif message_type == Recipient.PERSONAL:
            message.recipient = Recipient.objects.get(type=Recipient.PERSONAL,
                                                      type_id=personals_pair[0])
            message.sender = get_user_profile_by_id(personals_pair[1])
            saved_data['personals_pair'] = personals_pair
        elif message_type == Recipient.STREAM:
            stream = Stream.objects.get(id=message.recipient.type_id)
            # Pick a random subscriber to the stream
            message.sender = random.choice(Subscription.objects.filter(
                recipient=message.recipient)).user_profile
            message.subject = stream.name + str(random.randint(1, 3))
            saved_data['subject'] = message.subject

        # Spoofing time not supported with threading
        if options['threads'] != 1:
            message.pub_date = timezone_now()
        else:
            # Distrubutes 80% of messages starting from 5 days ago, over a period
            # of 3 days. Then, distributes remaining messages over past 24 hours.
            spoofed_date = timezone_now() - timezone_timedelta(days = 5)
            if (num_messages < tot_messages * 0.8):
                # Maximum of 3 days ahead, convert to minutes
                time_ahead = 3 * 24 * 60
                time_ahead //= int(tot_messages * 0.8)
            else:
                time_ahead = 24 * 60
                time_ahead //= int(tot_messages * 0.2)

            spoofed_minute = random.randint(time_ahead * num_messages, time_ahead * (num_messages + 1))
            spoofed_date += timezone_timedelta(minutes = spoofed_minute)
            message.pub_date = spoofed_date

        # We disable USING_RABBITMQ here, so that deferred work is
        # executed in do_send_message_messages, rather than being
        # queued.  This is important, because otherwise, if run-dev.py
        # wasn't running when populate_db was run, a developer can end
        # up with queued events that reference objects from a previous
        # life of the database, which naturally throws exceptions.
        settings.USING_RABBITMQ = False
        do_send_messages([{'message': message}])
        settings.USING_RABBITMQ = True

        recipients[num_messages] = (message_type, message.recipient.id, saved_data)
        num_messages += 1
    return tot_messages