예제 #1
0
def get_aggregated_feed(_id):
    """
    Aggregates feeds give a list of pages and their ids.

    Input: A list of tuples
    Output: Combined list of posts sorted by timestamp
    """
    page_data = get_feed(_id)
    for data_dict in page_data:
        data_dict['source'] = _id

# data.extend(page_data)
    page_data.sort(key=lambda x: parse(x['created_time']), reverse=True)
    page_data = prettify_date(page_data)
    parser = commonregex.CommonRegex()
    for post in page_data:
        if 'message' not in post:
            # adding dummy message so if a post have no text in it send message
            # will not give error
            post['message'] = ""
    page_data = remove_duplicates(page_data)
    page_data.sort(key=lambda x: parse(x['created_time']), reverse=True)
    #
    json.dump(page_data, open('{}.json'.format(_id), 'w'))
    return page_data[0]['id']
예제 #2
0
def get_aggregated_feed(_id, log):
    """
    Aggregates feeds give a list of pages and their ids.

    Input: A list of tuples
    Output: Combined list of posts sorted by timestamp
    """
    #data = list()
    #for page_name, _id in pages:
    global logger
    logger = log
    page_data = get_feed(_id)
    for data_dict in page_data:
        data_dict['source'] = _id
# data.extend(page_data)
    page_data.sort(key=lambda x: parse(x['created_time']), reverse=True)
    page_data = prettify_date(page_data)
    parser = commonregex.CommonRegex()
    for post in page_data:
        if 'message' in post:
            # post['message'] = fixnewlines(post['message'])
            if 'flag' not in post:
                post['message'] = enable_links(post['message'], parser)
                post['flag'] = 1
        else:
            post[
                'message'] = ""  #adding dummy message so if a post have no text in it send message will not give error
    page_data = remove_duplicates(page_data)
    page_data.sort(key=lambda x: parse(x['created_time']), reverse=True)
    #
    json.dump(page_data, open('FB/page_json/{}.json'.format(_id), 'w'))
    return page_data[0]['id']
예제 #3
0
def validate_args(arg):
    """Validate command line arguments."""

    parser = commonregex.CommonRegex()

    if not re.search('(?ix) \A [a-z0-9-\.]+ \Z', str(arg.domain)):
        raise TypeError("Invalid domain name.")

    if not re.search('(?ix) \A [a-z0-9-]+ \Z', str(arg.record)):
        raise TypeError("Invalid record.")

    return
예제 #4
0
def enable_links(message):
	parser = commonregex.CommonRegex()
	links = parser.links(message)
	links = list(set(links))
	url_identifier = ["www","http","bit.ly",".com",".co.in"]
	for link in links:
		flag = 0
		for keyword in url_identifier :
			if keyword in link :
				flag = 1
				break
		if flag is 0 :
			break
		http_link = link
		if not link.startswith('http'):
			http_link = "http://{}".format(link)
		if len(link) < 25:
			link = link[0:25]
			message = message.replace(link, " <a href=\"{}\" target=\"_blank\"> {} </a> ".format(http_link, link) , 1)
		else:    
		   # message = shortify_string(message)
			message = message.replace(link, " <a href=\"{}\" target=\"_blank\"> {} </a> ".format(http_link, link[0:25]+"...") ,1 ) 
	return message
예제 #5
0
import json
from django.utils.encoding import smart_str
from jinja2 import Template
import commonregex

parser = commonregex.CommonRegex()


def fixnewlines(message):
    return message.replace('\n', ' <br> ')


# def shortify_string(message):

#     message = message.split(" ")
#     new_message = ""
#     for mess in message :
#         if len(mess) > 25 :
#             mess = " <a href='" + mess + "' target='_blank'> " + mess[0:25] + "... </a> "
#         new_message = new_message + mess + " "

#     return new_message


def truncate(message, length):

    while length < len(message):
        if message[length] == " ":
            return message[0:length]
        length = length + 1
    return message[0:len(message) - 1]