Ejemplo n.º 1
0
    def grab_data(self,
                  folder,
                  server,
                  channel,
                  num_comments=1000,
                  isdm=False):
        """Scan and grab the attachments. Default number of comments to extract is 1000

        :param folder: The folder name.
        :param server: The server name.
        :param channel: The channel name.
        :param isdm: A flag to check whether we're in a DM or not.

        """

        date = datetime.today()

        while date.year >= 2015 and num_comments > 0:
            request = SimpleRequest(self.headers).request
            today = get_day(date.day, date.month, date.year)

            if not isdm:
                request.set_header(
                    'referer', 'https://discordapp.com/channels/%s/%s' %
                    (server, channel))
                content = request.grab_page(
                    'https://discordapp.com/api/%s/guilds/%s/messages/search?channel_id=%s&min_id=%s&max_id=%s&%s'
                    % (self.api, server, channel, today['00:00'],
                       today['23:59'], self.query))
            else:
                request.set_header(
                    'referer',
                    'https://discordapp.com/channels/@me/%s' % channel)
                content = request.grab_page(
                    'https://discordapp.com/api/%s/channels/%s/messages/search?min_id=%s&max_id=%s&%s'
                    % (self.api, channel, today['00:00'], today['23:59'],
                       self.query))

            try:
                if content['messages'] is not None:
                    for messages in content['messages']:
                        for message in messages:
                            # self.check_config_mimetypes(message, folder)

                            if self.types['text'] is True:
                                if len(message['content']) > 0:
                                    self.insert_text(server, channel, message)
                                    if num_comments % 100 == 0:
                                        print(
                                            f"adding {num_comments}th comment to database"
                                        )
                                    num_comments -= 1

            except TypeError:
                continue

            date += timedelta(days=-1)
Ejemplo n.º 2
0
    def download(self, url, folder):
        """Download the contents of a URL.

        :param url: The target URL.
        :param folder: The target folder.
        """

        request = SimpleRequest(self.headers).request
        request.set_header('user-agent', 'Mozilla/5.0 (X11; Linux x86_64) Chrome/78.0.3904.87 Safari/537.36')

        filename = safe_name('%s_%s' % (url.split('/')[-2], url.split('/')[-1]))
        if not path.exists(filename):
            request.stream_file(url, folder, filename, self.buffer)
Ejemplo n.º 3
0
    def grab_data(self, folder, server, channel, isdm=False):
        """Scan and grab the attachments.

        :param folder: The folder name.
        :param server: The server name.
        :param channel: The channel name.
        :param isdm: A flag to check whether we're in a DM or not.
        """

        tzdata = gmtime(time())

        try:
            for year in range(tzdata.tm_year, 2015, -1):
                for month in range(12, 1, -1):
                    for day in range(31, 1, -1):

                        if month > tzdata.tm_mon and year == tzdata.tm_year:
                            continue

                        if month == tzdata.tm_mon and day > tzdata.tm_mday:
                            continue

                        request = SimpleRequest(self.headers).request
                        today = get_day(day, month, year)

                        if not isdm:
                            request.set_header('referer', 'https://discordapp.com/channels/%s/%s' % (server, channel))
                            content = request.grab_page(
                                'https://discordapp.com/api/%s/guilds/%s/messages/search?channel_id=%s&min_id=%s&max_id=%s&%s' %
                                (self.api, server, channel, today['00:00'], today['23:59'], self.query)
                            )
                        else:
                            request.set_header('referer', 'https://discordapp.com/channels/@me/%s' % channel)
                            content = request.grab_page(
                                'https://discordapp.com/api/%s/channels/%s/messages/search?min_id=%s&max_id=%s&%s' %
                                (self.api, channel, today['00:00'], today['23:59'], self.query)
                            )

                        try:
                            if content['messages'] is not None:
                                for messages in content['messages']:
                                    for message in messages:
                                        self.check_config_mimetypes(message, folder)

                                        if self.types['text'] is True:
                                            if len(message['content']) > 0:
                                                self.insert_text(server, channel, message)
                        except TypeError:
                            continue
        except ValueError:
            pass