def timeline(self): self.info['username'] = self.username f = get_posts(self.username) try: for post in get_posts(self.username, pages=50): if post['time'] and post['post_url']: self.info['timestamps'][post['time'].strftime( "%Y-%m-%d %H:%M:%S")] = post['post_url'] try: self.info['words'].append(post['text'][:50]) if post['post_url']: self.info['likes'][post['likes']] = post['post_url'] self.info['comments'][ post['comments']] = post['post_url'] except: pass except Exception as e: pass if not self.info['words']: return False return self.info
def get_data_item(params): data = [] res = [] if params['type'] == 'fp': res = get_posts(params['target'], pages=5) else: res = get_posts(group=params['target'], pages=5) for d in res: if d['image'] != None: data.append({'id': d['post_id'], 'url': d['image']}) return data
async def steam(ctx): ## ## await ctx.message.add_reaction('🎮') channel = client.get_channel(769224265697329164) reader = open("steampost.txt", "r") list_string = reader.read() reader.close() writer = open("steampost.txt", "a") post_list = list_string.split(" ") try: for post in get_posts('comusteambrasil', pages=2): if str(post['post_id']) not in post_list: embed = discord.Embed(title=str(post['time']), description=post['text'], color=0xff5555) message = await channel.send(embed=embed) await message.add_reaction('🥵') await message.add_reaction('🥶') writer.write(f" {post['post_id']}") writer.close() return except Exception as err: print(err)
def test_get_posts(self): expected_post = { 'comments': 73, 'image': 'https://scontent.faqp2-3.fna.fbcdn.net/v/t1.0-9/fr/cp0/e15/q65/96724875_3065146506903115_4237164853036318720_o.jpg?_nc_cat=103&_nc_sid=8024bb&_nc_oc=AQmzJTxqWcBz-Q2u7AX_Aj_6bwv6V86hZS-v9BY-3w0h7jy9_LGi-LXss6UJuQn9xhk&_nc_ht=scontent.faqp2-3.fna&_nc_tp=14&oh=a057d46d536592575cce1605eac62dc4&oe=5EE011FB', 'images': [ 'https://scontent.faqp2-3.fna.fbcdn.net/v/t1.0-9/fr/cp0/e15/q65/96724875_3065146506903115_4237164853036318720_o.jpg?_nc_cat=103&_nc_sid=8024bb&_nc_oc=AQmzJTxqWcBz-Q2u7AX_Aj_6bwv6V86hZS-v9BY-3w0h7jy9_LGi-LXss6UJuQn9xhk&_nc_ht=scontent.faqp2-3.fna&_nc_tp=14&oh=a057d46d536592575cce1605eac62dc4&oe=5EE011FB', 'https://scontent.faqp2-2.fna.fbcdn.net/v/t1.0-9/fr/cp0/e15/q65/96657922_3065146630236436_9052202957155598336_o.jpg?_nc_cat=101&_nc_sid=8024bb&_nc_ohc=HJe4yM4ZM-IAX_A4Gbb&_nc_ht=scontent.faqp2-2.fna&tp=14&oh=0f88fe17a844510b3ca40ecd53392657&oe=5FA220AD', 'https://scontent.faqp2-3.fna.fbcdn.net/v/t1.0-9/fr/cp0/e15/q65/96557798_3065146790236420_838564679184809984_o.jpg?_nc_cat=103&_nc_sid=8024bb&_nc_ohc=ZAWOX3v_GjwAX_nMJvh&_nc_ht=scontent.faqp2-3.fna&tp=14&oh=0351cb4b748dd6ce296dd02341f3f949&oe=5FA16534', 'https://scontent.faqp2-3.fna.fbcdn.net/v/t1.0-9/fr/cp0/e15/q65/96688092_3065146896903076_7861539131082407936_o.jpg?_nc_cat=108&_nc_sid=8024bb&_nc_ohc=G3b4bTeYIoEAX8IYjU4&_nc_ht=scontent.faqp2-3.fna&tp=14&oh=ae53e61554bfe97b85fe3dff884a4a2f&oe=5FA1DB01', ], 'video': None, 'video_thumbnail': None, 'likes': 1334, 'link': 'https://www.nintendo.com/wallpapers/', 'post_id': '3065154550235644', 'post_text': 'Check out these themed wallpapers and many more at the link ' 'below for your personal use! We hope you enjoy them!\n\n' 'https://www.nintendo.com/wallpapers/', 'post_url': 'https://facebook.com/Nintendo/posts/3065154550235644', 'shared_text': '', 'shares': 0, 'text': 'Check out these themed wallpapers and many more at the link below ' 'for your personal use! We hope you enjoy them!\n\n' 'https://www.nintendo.com/wallpapers/', 'time': datetime.datetime(2020, 5, 12, 20, 1, 18), 'user_id': '119240841493711', 'username': '******', 'video_id': None, 'is_live': False, } post = next(get_posts(account='Nintendo')) assert post == expected_post
def test_get_posts(self): expected_post = { 'comments': 73, 'image': 'https://scontent.faqp2-3.fna.fbcdn.net/v/t1.0-9/fr/cp0/e15/q65/96724875_3065146506903115_4237164853036318720_o.jpg?_nc_cat=103&_nc_sid=8024bb&_nc_oc=AQmzJTxqWcBz-Q2u7AX_Aj_6bwv6V86hZS-v9BY-3w0h7jy9_LGi-LXss6UJuQn9xhk&_nc_ht=scontent.faqp2-3.fna&_nc_tp=14&oh=a057d46d536592575cce1605eac62dc4&oe=5EE011FB', 'video': None, 'likes': 1334, 'link': 'https://www.nintendo.com/wallpapers/', 'post_id': '3065154550235644', 'post_text': 'Check out these themed wallpapers and many more at the link ' 'below for your personal use! We hope you enjoy them!\n' 'https://www.nintendo.com/wallpapers/', 'post_url': 'https://m.facebook.com/story.php?story_fbid=3065154550235644&id=119240841493711', 'shared_text': '', 'shares': 0, 'text': 'Check out these themed wallpapers and many more at the link below ' 'for your personal use! We hope you enjoy them!\n' 'https://www.nintendo.com/wallpapers/', 'time': datetime.datetime(2020, 5, 12, 20, 1, 18), } post = next(get_posts(account='Nintendo')) assert post == expected_post
def check(): with NamedTemporaryFile(mode='w', delete=False) as tempfile: with open('pages.csv', mode='r+') as csv_file, tempfile: csv_reader = csv.DictReader(csv_file) csv_writer = csv.DictWriter(tempfile, fields) line_count = 0 csv_writer.writeheader() for page in csv_reader: temp = None for post in get_posts(page['page_tag'], pages=1): if post['time'] <= datetime.strptime(page['last_post_used'], date_format): # post already sent to channel break if post['image'] is not None: bot.send_photo(chat_id, post['image'], (post['text'] if post['text'] else '')+ '\n[' + page['page_name'] + ']') if (post['time'] > datetime.strptime(page['last_post_used'], date_format) and temp is None): temp = post['time'] temp = post['time'] temp = post['time'] temp = post['time'] temp = post['time'] elif post['text'] is not None: bot.send_message(chat_id, (post['text'] if post['text'] else '')+ '\n[' + page['page_name'] + ']') if (post['time'] > datetime.strptime(page['last_post_used'], date_format) and temp is None): temp = post['time'] if temp is not None: page['last_post_used'] = temp row = {'page_name': page['page_name'], 'page_tag': page['page_tag'], 'last_post_used': page['last_post_used']} csv_writer.writerow(row)
def scrape_accounts(accounts): posts = [] for account in accounts: for post in get_posts(account, pages=5): post['account'] = account posts.append(post) return posts
def command_add(update, context, *args): user = User.objects.get(chat_id=args[0]) user_pages = eval(user.pages) if len(args) == 7: page = args[-1].lower() else: try: page = update.message.text[5:].lower() if page in user_pages: update.message.reply_text(f"Page '{page}' Already Exist") return list(get_posts(page, pages=1)) except: update.message.reply_text("Page Does Not Exist") return if page not in user_pages: user_pages.append(page) user.pages = user_pages user.save() else: update.message.reply_text(f"Page '{page}' Already Exist") return try: db_page = Page.objects.get(name=page) subscribers = eval(db_page.subscribers) subscribers.append(args[0]) db_page.save() except Page.DoesNotExist: Page.objects.create(name=page, last_update=0, subscribers=[args[0]]) update.message.reply_text(f"Page {page} Added Successfully!\n\nSee Your Pages With /list")
def get_data_from_group(self, keyword): for group in self.groups: for post in get_posts(group=group): if post["text"]: if keyword in post["text"]: self.data = self.data + " " + post["text"] return self.data
def extract_page_public_posts(page_name, count): max_total_page = int(count / 2) file_path = page_name + '_' + str(count) + '.json' print('Saving to ' + file_path + '...') data = [] _count = 0 try: for post in get_posts(page_name, pages=max_total_page, extra_info=True, timeout=15): print(post) if not (post['fail']): del post['fail'] data.append(post) _count = _count + 1 print(_count) print('\n') if (_count >= count): break except Exception as e: print(e) finally: with open(file_path, "w", encoding='utf-8') as jsonfile: json.dump(data, jsonfile, ensure_ascii=False)
def get_facebook_post(nombre_pagina, numero_paginas, id_campana, id_escucha, id_red): data_escucha = escucha.objects.get(id=id_escucha) data_campana = campana_publicitaria.objects.get(id=id_campana) data_red_social = red_social.objects.get(id=id_red) for publicacion in get_posts(nombre_pagina, pages=numero_paginas): if publicacion and publicacion["post_id"]: data_red_escucha = data_red.objects.filter( publicacion_id=publicacion["post_id"], data_red_escucha=data_escucha, data_red_campana=data_campana).values() if not data_red_escucha: if not publicacion["post_text"] or publicacion[ "post_text"] == None: publicacion_texto = "" else: publicacion_texto = publicacion["post_text"] new_publication = data_red( publicacion_id=publicacion["post_id"], publicacion_fecha=publicacion["time"], publicacion_texto=publicacion_texto, publicacion_likes=publicacion["likes"], publicacion_comentarios=publicacion["comments"], publicacion_compartidos=publicacion["shares"], publicacion_user=publicacion["user_id"], data_red_escucha=data_escucha, data_red_campana=data_campana, data_red_social=data_red_social) new_publication.save() logger.error('Task get facebook post completed succesfully')
def scraperpage(page, n, path=path): path = path + str(page) + ".csv" posts = get_posts(page, pages=n) dataframe = pd.DataFrame(posts) dataframe.to_csv(path, index=False) return (path)
def routiney(): for post in get_posts('officialroutineofnepalbanda', pages=1): # print('TIME: '+str(post['time'])+'\n') # print(post['text']+'\n') # print('Image: '+post['image']+'\n') # print('Link to Post: '+post['post_url']+'\n\n') time = str(post['time']) f = open("r.txt", "r") prev = f.read() old = re.search(time, prev) if old == None: #if time!=prev: if str(post['image']) != 'None': data = { 'username': '******', 'avatar_url': 'https://pbs.twimg.com/profile_images/777188003445739521/t5GNGfAc_400x400.jpg', 'content': str(post['text']) + '\n' + post['image'] } if str(post['image']) == 'None': data = { 'username': '******', 'avatar_url': 'https://pbs.twimg.com/profile_images/777188003445739521/t5GNGfAc_400x400.jpg', 'content': str(post['text']) } response = requests.post( 'https://discord.com/api/webhooks/XXX/XXX-XXX', data=data) last = str(post['time']) f = open("r.txt", "a") f.write(last) f.close() #sleep(120) ok()
def scrap_facebook_post(n=100): for client in clients: posts = [] with open('../data/{}_fb_posts.json'.format(client), 'w') as json_file: for post in get_posts(client, pages=n): posts.append(post) json.dump(posts, json_file, default=str)
def test_get_group_posts(self): text = ( 'Hola!, This group is aimed to create opportunities for South ' 'American students in Computer Science and related fields.\n\n' 'Hope this will help us to know what we are doing in our work, ' 'achievements to be recognized, increase fairness in our area, and ' 'maybe conferences where we might meet.\n\n' 'Professors and professionals are also welcomed to share their ' 'experiences and to collaborate among us and learn together.\n\n' 'Some short rules for a happy co-existence:\n' '1. No business advertisement or spam.\n' '2. Topics relevant to Computing, Computer Science, Software ' 'Engineering, and Education.\n' '3. Political and religious advertisement are not allowed.') expected_post = { 'comments': 1, 'image': None, 'video': None, 'video_thumbnail': None, 'likes': 26, 'link': None, 'post_id': None, 'post_text': text, 'post_url': None, 'shared_text': '', 'shares': 0, 'text': text, 'time': datetime.datetime(2018, 4, 3, 20, 2, 0), } post = next(get_posts(group=117507531664134)) assert post == expected_post
def recent_posts(save=False): #REPLACED WITH NEW FUNCTION pull_all_posts() #Save arguement does not currently work - keep as FALSE print("Getting most recent posts from the group...") i=0 data = {} data['postsData'] = [] for post in fs.get_posts(group='670932227050506/?sorting_setting=CHRONOLOGICAL'): if post['post_id'] == None: print(f' Post ID readings as none - cannot save post {i}') else: #print(post['post_id']) data['postsData'].append(post) i+=1 print(f"Retreived {i} posts from the group.") if save is True: print(f"Attempting to save retreived {i} posts from the group to file: {new_data_file}") with open(new_data_file, 'w') as savefile: try: json.dump(data['postsData'], savefile) print('Saved new posts successfully') except: print(f'Unable to save new posts to {new_data_file}') print(type(data['postsData'])) return(data['postsData'])
def add_pages(): with open('pages.csv', 'a') as f: csv_writer = csv.DictWriter(f, fields) c = 'y' while c != 'n': row = {} print("Give me a facebook page id:") row['page_tag'] = input() print("What name you want to associate to this page?:") row['page_name'] = input() print("Please wait while retrieving page info...") posts = list( get_posts(row['page_tag'], pages=3, cookies='cookies.txt')) if len(posts) > 1: if (int(posts[0]['post_id']) > int(posts[1]['post_id'])): row['last_post_used'] = posts[0]['post_id'] else: # first post is a pinned one row['last_post_used'] = posts[1]['post_id'] elif len(posts) == 1: row['last_post_used'] = posts[0]['post_id'] else: row['last_post_used'] = 0 csv_writer.writerow(row) print("...done.") print("Do you want to add another page? [y/n]:") while True: c = input() if c != 'y' and c != 'n': print("Invalid input, please enter 'y' or 'n'") else: break print('The bot is now configured, you can start it with ./scraper.py')
def getPosts(pages2): posts = ' ' dataClean = [] for page in pages2: for post in get_posts(page, pages=20): if confimTime(post['time']): if post['text'] != ' ': print(post['time']) try: post = re.sub(r"<.*?>", ' ', post['text']) post = re.sub(r'[0-9]', ' ', post) post = re.sub(r' ', '', post) post = re.sub(r'[a-zA-Z]', ' ', post) post = re.sub(r'\n', ' ', post) post = re.sub(r'[!.&é"' '(-èê_çà)=$*ù^ù:;?#؟]', ' ', post) posts = posts + post except: continue text = re.findall(r'\w+', posts) #remove stops words for word in text: if word not in stop_words: dataClean.append(word) r = pd.DataFrame({'data': dataClean}) r.to_csv("dataPosts.csv", index=False, encoding='utf-8-sig')
def scrape_page(self, page, pages=1): self.page = page posts = get_posts(page, pages=pages) for e in posts: # rate limit to avoid fb blocking sleep(30) self.download_post(e)
def get_facebook_posts(): name = request.args['name'] pageCount = int(request.args['pageCount']) posts = list(get_posts(name, pages=pageCount)) return jsonify(posts)
def update_memes(fanpages: list, num_pages: int): """ Looks at FB fanpages and possibly adds new memes. :param fanpages: List of fanpages. :param num_pages: Number of pages of single fanpage to look. """ count = 0 new_count = 0 try: memes = Meme.objects.all() for fanpage in fanpages: for post in get_posts(fanpage, pages=num_pages): count += 1 post_url = post['post_url'] post_id = post['post_id'] image_url = post['image'] if post_url and post_id and image_url: if not memes.filter(post_id=post_id).exists(): new_meme = Meme(post_id=post_id, post_url=post_url, image_url=image_url, views=0, valid=True) new_count += 1 new_meme.save() logging.info(f'Looked at {count} posts, added {new_count} new memes') except Exception as e: logging.exception("Exception occurred")
def check(): with open('fbpages.csv', mode='r+') as csv_file, NamedTemporaryFile(mode='w', delete=False) as tempfile: csv_reader = csv.DictReader(csv_file) csv_writer = csv.DictWriter(tempfile, fields) line_count = 0 csv_writer.writeheader() for page in csv_reader: temp = None posts = list(get_posts(page['fbpage_tag'], pages=2)) posts.sort(key = lambda x: x['time']) for post in posts: if post['time'] <= datetime.strptime(page['last_post_date'], date_format): # post already sent to channel break if post['image'] is not None: bot.send_photo(chat_id, post['image'], (post['text'] if post['text'] else '')+ '\n[' + page['telegram_name'] + ']') if (post['time'] > datetime.strptime(page['last_post_date'], date_format) and temp is None): temp = post['time'] elif post['text'] is not None: bot.send_message(chat_id, (post['text'] if post['text'] else '')+ '\n[' + page['telegram_name'] + ']') if (post['time'] > datetime.strptime(page['last_post_date'], date_format) and temp is None): temp = post['time'] if temp is not None: page['last_post_date'] = temp row = {'telegram_name': page['telegram_name'], 'fbpage_tag': page['fbpage_tag'], 'last_post_date': page['last_post_date']} csv_writer.writerow(row) shutil.move(tempfile.name, 'fbpages.csv') threading.Timer(WAIT_SECONDS, check).start()
def get_page_posts(): json_posts = {} # Run facebook-scraper posts = get_posts(username, pages=3) for i, post in enumerate(posts): json_posts[i] = post # Convert date time object to string if json_posts[i]['time']: json_posts[i]['time'] = str(json_posts[i]['time']) # Store the posts data to JSON file try: with open(result_dir / (username + "-posts-fb-page.json"), 'w+', encoding='utf-8') as jsonf: jsonf.write(json.dumps( json_posts, indent=2, )) except Exception as e: logger.exception("Exception occurred") # print(Fore.RED + type(e).__name__ + Fore.RESET + ": " + str(e)) print( json.dumps({ "ERROR": "Error occurred while storing Facebook page posts data, see logs for more details." }))
def scrape(self): # credentials = ("*****@*****.**", "TempPassword123") # This lists will contain post related values. translated_posts = [] user_id_list = [] # This lists will contain NLP related values. bow_list = [] score_list = [] for post in get_posts(group=self.group, pages=10): text = clean_post(post['text']) user_id = str(post['user_id']) if text != "" and user_id is not None and user_id != "None": time.sleep(0.2) translated_posts.append(self._translator.translate(text)) user_id_list.append(str(post['user_id'])) for translated_post in translated_posts: bow_list.append(NPL_Engine.make_bag_of_words(translated_post)) score_list.append( NPL_Engine.calculate_bow_relation_to_cluster(bow_list[-1])) for score, facebook_id in zip(score_list, user_id_list): if score > NPL_Engine.HAVER_THRESHOLD: new_haver = Haver(facebook_id) self._handler.add_haver_to_db(new_haver) self._handler.commit() return user_id_list
def text_handler(self, update, context): """ https://www.facebook.com/pagename/adss https://m.facebook.com/pagename/adss :param update: :param context: :return: """ self.start_time = time.time() data = list(self.extract_message(update)) text = update.message.text if 'facebook.com' in text: try: page = text.split('facebook.com')[1].split('/')[1] list(get_posts(page, pages=1)) data.append(page) Add.command_add(update, context, *tuple(data)) return except: update.message.reply_text("Invalid Link or Can't Add That Page.") return elif text == '.status': update.message.reply_text(f"{BOT_ID}\n{(time.time() - self.start_time)//60} Minutes.") return update.message.reply_text("Unrecognized Text")
def setup(): print( "This is the setup process for the bot, please be ready with the info required:" ) print("- Telegram Bot Token can be found in your chat with @BotFather") print("- The tag of the channel where the bot is admin") print( "- The ids of the facebook pages you are interested (can be found in the url https://www.facebook.com/" + '\033[1m' + "<page_id>" + '\033[0m' + ")") print( "- Cookies to connect to facebook, you can find instruction into the README.md file" ) print("\nDo you want to proceed? [Y/n]") proceed = input() if proceed == 'n': exit() with open('config.py', 'w') as f: print("Enter the Token:") TOKEN = input() print("Enter the chat/channel tag:") chat_id = input() if chat_id[0] == '@': chat_id = chat_id[1:] f.writelines([ "TOKEN = '" + TOKEN + "'" + '\n', "chat_id = '@" + chat_id + "'\n" ]) with open('pages.csv', 'w') as f: csv_writer = csv.DictWriter(f, fields) csv_writer.writeheader() c = 'y' while c != 'n': row = {} print("Give me a facebook page id:") row['page_tag'] = input() print("What name you want to associate to this page?:") row['page_name'] = input() print("Please wait while retrieving page info...") posts = list( get_posts(row['page_tag'], pages=3, cookies='cookies.txt')) if len(posts) > 1: if (int(posts[0]['post_id']) > int(posts[1]['post_id'])): row['last_post_used'] = posts[0]['post_id'] else: # first post is a pinned one row['last_post_used'] = posts[1]['post_id'] elif len(posts) == 1: row['last_post_used'] = posts[0]['post_id'] else: row['last_post_used'] = 0 csv_writer.writerow(row) print("...done.") print("Do you want to add another page? [y/n]:") while True: c = input() if c != 'y' and c != 'n': print("Invalid input, please enter 'y' or 'n'") else: break print('The bot is now configured, you can start it with ./scraper.py')
def getNewPosts(self): """Scrape the Facebook page for new posts, and filter out previously seen posts""" CCTwitterBot.log('Retrieving new posts') posts = fb.get_posts(self.pageName, pages=5) posts = [p for p in posts if p['post_id'] not in self.state] CCTwitterBot.log('Successfully retrieved {} new posts'.format( len(posts))) return posts
def getPost(page_name, key, enable_emoji=True): for post in get_posts(page_name): if key in post['text'][:]: # Looking for the clock emoji if enable_emoji: return post['text'][:] else: return remove_emoji(post['text'][:]) return None
def scrape(n): for post in get_posts(n, pages=1): fb_post_url = post['post_url'] # check against FB_list.csv if df[Name] print(fb_post_url) sleep(10) break
def scrap(i): for posts in get_posts('alcheringaiitg', pages=i): inst=socialMessage() inst.message = posts['text'] inst.imglink = posts['image'] inst.save() print('Scraping done!')