def get_aggregated_feed(_id): """ Aggregates feeds give a list of pages and their ids. Input: A list of tuples Output: Combined list of posts sorted by timestamp """ page_data = get_feed(_id) for data_dict in page_data: data_dict['source'] = _id # data.extend(page_data) page_data.sort(key=lambda x: parse(x['created_time']), reverse=True) page_data = prettify_date(page_data) parser = commonregex.CommonRegex() for post in page_data: if 'message' not in post: # adding dummy message so if a post have no text in it send message # will not give error post['message'] = "" page_data = remove_duplicates(page_data) page_data.sort(key=lambda x: parse(x['created_time']), reverse=True) # json.dump(page_data, open('{}.json'.format(_id), 'w')) return page_data[0]['id']
def get_aggregated_feed(_id, log): """ Aggregates feeds give a list of pages and their ids. Input: A list of tuples Output: Combined list of posts sorted by timestamp """ #data = list() #for page_name, _id in pages: global logger logger = log page_data = get_feed(_id) for data_dict in page_data: data_dict['source'] = _id # data.extend(page_data) page_data.sort(key=lambda x: parse(x['created_time']), reverse=True) page_data = prettify_date(page_data) parser = commonregex.CommonRegex() for post in page_data: if 'message' in post: # post['message'] = fixnewlines(post['message']) if 'flag' not in post: post['message'] = enable_links(post['message'], parser) post['flag'] = 1 else: post[ 'message'] = "" #adding dummy message so if a post have no text in it send message will not give error page_data = remove_duplicates(page_data) page_data.sort(key=lambda x: parse(x['created_time']), reverse=True) # json.dump(page_data, open('FB/page_json/{}.json'.format(_id), 'w')) return page_data[0]['id']
def validate_args(arg): """Validate command line arguments.""" parser = commonregex.CommonRegex() if not re.search('(?ix) \A [a-z0-9-\.]+ \Z', str(arg.domain)): raise TypeError("Invalid domain name.") if not re.search('(?ix) \A [a-z0-9-]+ \Z', str(arg.record)): raise TypeError("Invalid record.") return
def enable_links(message): parser = commonregex.CommonRegex() links = parser.links(message) links = list(set(links)) url_identifier = ["www","http","bit.ly",".com",".co.in"] for link in links: flag = 0 for keyword in url_identifier : if keyword in link : flag = 1 break if flag is 0 : break http_link = link if not link.startswith('http'): http_link = "http://{}".format(link) if len(link) < 25: link = link[0:25] message = message.replace(link, " <a href=\"{}\" target=\"_blank\"> {} </a> ".format(http_link, link) , 1) else: # message = shortify_string(message) message = message.replace(link, " <a href=\"{}\" target=\"_blank\"> {} </a> ".format(http_link, link[0:25]+"...") ,1 ) return message
import json from django.utils.encoding import smart_str from jinja2 import Template import commonregex parser = commonregex.CommonRegex() def fixnewlines(message): return message.replace('\n', ' <br> ') # def shortify_string(message): # message = message.split(" ") # new_message = "" # for mess in message : # if len(mess) > 25 : # mess = " <a href='" + mess + "' target='_blank'> " + mess[0:25] + "... </a> " # new_message = new_message + mess + " " # return new_message def truncate(message, length): while length < len(message): if message[length] == " ": return message[0:length] length = length + 1 return message[0:len(message) - 1]