def download_pictures(user, password, thread_id, output_folder, msg_limit=100): if not isdir(output_folder): mkdir(output_folder) client = Client(user, password) if thread_id is None: thread = client.fetchThreadList()[0] msg_list = client.fetchThreadMessages(thread.uid, msg_limit) else: msg_list = client.fetchThreadMessages(thread_id=thread_id, limit=msg_limit) for msg in msg_list: for att in msg.attachments: att_valid = att.__class__.__name__ in "ImageAttachment, VideoAttachment".split( ", ") if not att_valid: continue url = client.fetchImageUrl(att.uid) data = http.request("GET", url=url, preload_content=False) data = data.read() try: ext = att.original_extension except AttributeError: ext = re.match(".+\.([a-z0-9]{3,4})\?.+", url) if ext is None: ext = "unkown" else: ext = ext.groups()[0] output_filename = join(output_folder, att.uid + "." + ext) print("Downloading %s" % output_filename) with open(output_filename, "wb") as f: f.write(data)
def getImageURLs(client :Client): #Gets a list of users with the given name and uses the first user in the list users = client.searchForUsers('Testo Accounto') user = users[0] #Grabs the latest 10 messages in the thread with te given user and reverse the thread messages = client.fetchThreadMessages(thread_id=user.uid, limit=10) messages.reverse() #Loops through all the messages and their attachments and tries to fetch the image's URL #Also prints out the messages in the thread for message in messages: for attach in message.attachments: print(client.fetchImageUrl(attach.uid)) print(message.text)
def main (): if len(sys.argv) < 4: print('Usage: python messenger_scraper.py facebook_username facebook_password conversation_id') return False facebook_username = sys.argv[1] facebook_password = sys.argv[2] conversation_id = sys.argv[3] client = Client(facebook_username, facebook_password) print('Attempting {}...'.format(conversation_id)) # what the f**k fbchat, where is function to retrieve all images? # ehh I have to do everything by myself lastMessageTimestamp = None try: os.mkdir(conversation_id) except OSError: print('Dir already exists!') try: while True: messages = client.fetchThreadMessages(conversation_id, 100, lastMessageTimestamp) lastMessageTimestamp = messages[len(messages)-1].timestamp endOfMessages = False for key,message in enumerate(messages): if message.attachments: for attachment in message.attachments: try: url = client.fetchImageUrl(attachment.uid) except: continue downloadImage(message.author, url, conversation_id, client) if key == len(messages)-1 and key < 99: endOfMessages = True if endOfMessages == True: break except KeyboardInterrupt: print('Downloading interrupted!')
def download_pictures(user, password, thread_id, output_folder): downloaded = 0 if not isdir(output_folder): mkdir(output_folder) client = Client(user, password) images = client.fetchThreadImages(thread_id) for im in images: if not type(im) == ImageAttachment: continue if (im.original_extension) == 'gif': continue img_filename = im.uid + ".jpg" output_filename = join(output_folder, img_filename) if (isFileExist(img_filename)): print("File %s already downloaded" % img_filename) continue url = client.fetchImageUrl(im.uid) data = http.request("GET", url=url, preload_content=False) data = data.read() downloaded += 1 print("Downloading %s" % output_filename) with open(output_filename, "wb") as f: f.write(data) client.send( Message(text="Synchronisation du cloud fini, nb photos telecharges: " + str(downloaded)), thread_id=client.uid, thread_type=ThreadType.USER) client.send( Message(text="Synchronisation du cloud fini, nb photos telecharges: " + str(downloaded)), thread_id=thread_id, thread_type=ThreadType.USER) client.logout()
if len(message.attachments) > 0: if (sender_id is None) or (sender_id == message.author): for attachment in message.attachments: if isinstance(attachment, ImageAttachment): try: attachment_ext = str.lower( attachment.original_extension) if attachment_ext not in extension_blacklist: full_images.append({ 'extension': attachment_ext, 'timestamp': message_datetime, 'full_url': fb_client.fetchImageUrl(attachment.uid) }) print('+', sep=' ', end='', flush=True) else: skip_count += 1 print('-', sep=' ', end='', flush=True) total_count += 1 except FBchatException: pass # ignore errors last_message_date = message_datetime # Download Full Images if len(full_images) > 0: images_count = len(full_images)
print("Message from other user/users") dir_sub = "from_others" if len( message.attachments ) > 1 and args.onedir == False: #if enabled, create directory for more attachments in one message dir = create_dir(dir_sub + '/' + msg_id) else: dir = "attachments/" + dir_sub #-----create and chooose correct directory----- #-----detect attachment type----- if isinstance(current, ImageAttachment): print("It's image") extension = current.original_extension url = client.fetchImageUrl(current.uid) if isinstance(current, AudioAttachment): print("It's audio") filename = current.filename extension = filename.split('.')[-1] url = current.url if isinstance(current, VideoAttachment): print("It's video") url = current.preview_url if isinstance(current, FileAttachment): print("It's a file") url = current.url print(url)
for message in messages: message_datetime = convert_epoch_to_datetime(message.timestamp) if len(message.attachments) > 0: if (sender_id is None) or (sender_id == message.author): for attachment in message.attachments: if isinstance(attachment, ImageAttachment): try: attachment_ext = str.lower(attachment.original_extension) if attachment_ext not in extension_blacklist: full_images.append({ 'extension': attachment_ext, 'timestamp': message_datetime, 'full_url': fb_client.fetchImageUrl(attachment.uid) }) print('+', sep=' ', end='', flush=True) else: skip_count += 1 print('-', sep=' ', end='', flush=True) total_count += 1 except FBchatException: pass # ignore errors last_message_date = message_datetime # Download Full Images if len(full_images) > 0: images_count = len(full_images)