def _get_messages(request, sync_token, direction):
    storage = get_messages(request)
    for message in storage:
        user_name = message
        print("MESSAGE : ", message)

    print("_get_message username: "******" : ", direction)
    messages.add_message(request, messages.INFO, user_name)
    sys.stdout.flush()
    session = Session.objects.get(matrix_user_name=user_name.message)
    api = MatrixHttpApi(session.matrix_server, token=session.matrix_token)
    synced = api.get_room_messages(api.get_room_id(session.matrix_room_name),
                                   session.matrix_sync_token,
                                   direction,
                                   limit=session.message_count)
    session.matrix_sync_token = synced[sync_token]
    session.save()
    room_topic = api.get_room_topic(api.get_room_id(
        session.matrix_room_name))['topic']
    synced['chunk'] = _parse_messages(synced['chunk'])
    return synced
Beispiel #2
0
class CrawlerMatrixClient:
    def __init__(self, token, server, roomid):
        self.server = server
        self.roomid = roomid
        self.token = token
        self.APIWrapper = MatrixHttpApi("https://{}".format(self.server), token=self.token)
        self.next_batch = self.APIWrapper.sync().get("next_batch")
        print("client initialized")

    def getChunk(self):
        b = self.APIWrapper.get_room_messages(self.roomid, self.next_batch, "b", limit=100)
        c = b.get("chunk")
        self.next_batch = b.get("end")
        return c

    def try_recover_suspend(self, output_file, contents, timestamps, names, message_count):
        with open(output_file, "r") as handle:
            a = handle.readlines()
            if a[len(a)-1] == "sus":
                print("\n\nrestoring from suspend'\n\n")
                self.next_batch = a[len(a)-2]
                a = a[:-2]
                for element in a:
                    b = element.split(";")
                    timestamps.append(b[0])
                    names.append(b[1])
                    contents.append(b[2])
                print("restored")
                return message_count - len(a) + 1 

    def suspend(self, contents, timestamps, names, output_file):
        print("suspending to output_file")
        with open(output_file, "w+") as handle:
            for i in range(len(contents)-1):
                handle.write(str(timestamps[i]))
                handle.write(";")
                handle.write(str(names[i]))
                handle.write(";")
                content = base64.b64encode(str.encode(str(contents[i]))).decode()
                handle.write(content)
                handle.write("\n")
            handle.write(self.next_batch)
            handle.write("\n")
            handle.write("sus")

    def dump_message_events(self, message_count, output_file):
        contents = []
        timestamps = []
        names = []
        try:
            message_count = self.try_recover_suspend(output_file, contents, timestamps, names, message_count)
        except:
            pass
        try:
            count = message_count // 100
            for progress in range(count):
                chunk = self.getChunk()
                for element in chunk:
                    content = element.get("content").get("body")
                    if content is not None:
                        timestamps.append(element.get("origin_server_ts"))
                        contents.append(content)
                        names.append(element.get("sender"))
                print("haha progress bar go brr {} out of {}".format(progress, count), end='\r')
            with open(output_file, "w+") as handle:
                for i in range(len(contents)-1):
                    handle.write(str(timestamps[i]))
                    handle.write(";")
                    handle.write(str(names[i]))
                    handle.write(";")
                    content = base64.b64encode(str.encode(str(contents[i]))).decode()
                    handle.write(content)
                    handle.write("\n")
                handle.write(self.next_batch)
                handle.write("\n")
                handle.write("sus")
        except KeyboardInterrupt:
            self.suspend(contents, timestamps, names, output_file)