Example #1
0
def main():
	"""
	Main loop of the process
	"""
	# timeout in seconds
	timeout = 10
	socket.setdefaulttimeout(timeout)

	# Delete old entries
	update_cursor = DBM.cursor('update')
	query = """
		DELETE FROM rss
			WHERE date < date_sub(now(), interval %s day)
	"""
	update_cursor.execute(query, (dbconf.blogs['days_to_keep'],))
	DBM.commit()
	update_cursor.close()

	users = set()
	news = set()
	blogs = get_candidate_blogs(dbconf.blogs['days_published'],
								dbconf.blogs['min_karma'])
	for blog in blogs:
		entries = blog.read_feed()
		time.sleep(3)
		if entries > 0:
			users.add(blog.user)
			news.add(blog)


	if dbconf.blogs['post_user'] and dbconf.blogs['post_key'] and users:
		post = _('Nuevo apunte en el blog de: ')
		for note in news:
			post += "@" + note.user
			for link in note.links:
				post += " " + link
			post += "\n"

		post += '\nhttp://'+dbconf.domain+dbconf.blogs['viewer']+" #blogs"
		print post
		try:
			url = """
				http://{d}{newpost}?user={post_user}&key={post_key}&text={t}
			""".format(d= dbconf.domain,
						t= urllib.quote_plus(post),
						**dbconf.blogs)
			## TODO: Use timeout parameter instead of
			##       socket.setdefaulttimeout(timeout)
			urlpost = urllib2.urlopen(url)
			print urlpost.read(100)
			urlpost.close()
		except KeyError:
			print "Error posting", url
			pass
Example #2
0
def main():
	"""
	Main loop of the process
	"""
	# timeout in seconds
	timeout = 10
	socket.setdefaulttimeout(timeout)

	# Delete old entries
	update_cursor = DBM.cursor('update')
	query = """
		DELETE FROM rss
			WHERE date < date_sub(now(), interval %s day)
	"""
	update_cursor.execute(query, (dbconf.blogs['days_to_keep'],))
	DBM.commit()
	update_cursor.close()

	users = set()
	news = set()
	blogs = get_candidate_blogs(dbconf.blogs['days_published'],
								dbconf.blogs['min_karma'])
	for blog in blogs:
		entries = blog.read_feed()
		time.sleep(3)
		if entries > 0:
			users.add(blog.user)
			news.add(blog)


	if dbconf.blogs['post_user'] and dbconf.blogs['post_key'] and users:
		post = _('Nuevo apunte en el blog de: ')
		for note in news:
			post += "@" + note.user
			for link in note.links:
				post += " " + link
			post += "\n"

		post += '\nhttp://'+dbconf.domain+dbconf.blogs['viewer']+" #blogs"
		print post
		try:
			url = """
				http://{d}{newpost}?user={post_user}&key={post_key}&text={t}
			""".format(d= dbconf.domain,
						t= urllib.quote_plus(post),
						**dbconf.blogs)
			## TODO: Use timeout parameter instead of
			##       socket.setdefaulttimeout(timeout)
			urlpost = urllib2.urlopen(url)
			print urlpost.read(100)
			urlpost.close()
		except KeyError:
			print "Error posting", url
			pass
Example #3
0
def do_site(site):
	""" Process a given site """
	links = {}
	cursor = DBM.cursor()
	query = """
		select link_id, link_uri,
			unix_timestamp(now()) - unix_timestamp(link_date)
		from links, subs, sub_statuses
		where subs.name = %s
			and subs.id = sub_statuses.id
			and status = 'published'
			and date > date_sub(now(), interval 24 hour)
			and link = link_id
			and link_votes/20 > link_negatives
		order by link_date desc
	"""
	cursor.execute(query, (site,))
	links_total = 0
	for link_id, link_uri, old in cursor:
		links_total += 1
		values = {}
		values['uri'] = link_uri
		# How old in seconds
		values['old'] = old
		values['w'] = 0
		values['c'] = 0
		values['v'] = 0
		values['links_order'] = links_total
		links[link_id] = values

	if not links_total:
		return

	links_format = ','.join(['%s'] * len(links))
	query = """
		select vote_link_id,
			sum((1-(unix_timestamp(now())
					- unix_timestamp(vote_date))/36000)) as x,
			count(*)
		from votes
		where vote_link_id in (%s)
			and vote_type='links'
			and vote_date > date_sub(now(), interval 12 hour)
			and vote_user_id > 0
			and vote_value > 6.1
		group by vote_link_id
		order by x desc
	"""  % links_format
	cursor.execute(query, tuple(links))
	votes_total = 0
	votes_links = 0
	v_total = 0
	v_list = {}
	for link_id, old, votes in cursor:
		votes_links += 1
		votes_old = float(old)
		links[link_id]['v'] = votes_old
		v_total += votes_old
		v_list[link_id] = votes_old
		links[link_id]['votes'] = votes
		votes_total += votes
		links[link_id]['votes_order'] = votes_links

	if not votes_links:
		return

	v_average = v_total/votes_links
	votes_average = votes_total/votes_links

	query = """
		select comment_link_id,
			sum(1.5*(1-(unix_timestamp(now())
						- unix_timestamp(comment_date))/36000)),
			count(*)
		from comments
		where comment_link_id in (%s)
			and comment_date > date_sub(now(), interval 12 hour)
		group by comment_link_id
	""" % links_format
	cursor.execute(query, tuple(links))
	comments_total = 0
	comments_links = 0
	c_total = 0
	c_list = {}
	for link_id, old, count in cursor:
		comment_old = float(old)
		comments_links += 1
		links[link_id]['c'] = comment_old
		c_total += comment_old
		c_list[link_id] = comment_old
		links[link_id]['comments'] = count
		comments_total += count

	if not comments_links:
		return

	c_average = c_total/comments_links
	comments_average = comments_total/comments_links
	query = """
		select id, counter from link_clicks where id in (%s)
	""" % links_format

	cursor.execute(query, tuple(links))
	for link_id, clicks in cursor:
		links[link_id]['clicks'] = clicks

	cursor.close()

	print "Site:", site, "Votes average:", votes_average, v_average, \
			"Comments average:", comments_average, c_average

	for link_id, link_value in links.items():
		if link_value['c'] > 0 \
				and link_value['v'] > 0 \
				and 'clicks' in link_value:
			links[link_id]['w'] = (1 - link_value['old']/(1.5*86400)) \
						   * (link_value['v'] \
						   + link_value['c'] \
						   + link_value['clicks'] \
						   * (1 - link_value['old']/86400) * 0.01)

	sorted_ids = sorted(links, cmp=lambda x, y:
											cmp(links[y]['w'], links[x]['w']))

	if sorted_ids:
		annotations = ','.join([unicode(x) for x in sorted_ids[:10]])
		cursor_update = DBM.cursor('update')
		query = """
			replace into annotations
				(annotation_key, annotation_expire, annotation_text)
				values (%s, date_add(now(), interval 15 minute), %s)
		"""
		cursor_update.execute(query, ('top-actives-'+site, annotations))
		cursor_update.close()
		DBM.commit()

	i = 0
	for key in sorted_ids:
		if links[key]['w'] > 0 and i < 10:
			i += 1


	# Select the top stories
	annotations = ','.join([unicode(x) for x in sorted_ids
						if links[x]['w'] > dbconf.tops['min-weight']
							and (links[x]['links_order'] > 1
							or links[x]['old'] > 3600)
							and links[x]['c'] > c_avrg(c_list, x) * 4
							and links[x]['v'] > c_avrg(v_list, x) * 4
							and links[x]['votes_order'] <= 10 ])

	print "SELECT: ", site, annotations

	if annotations:
		cursor_update = DBM.cursor('update')
		query = """
			replace into annotations
				(annotation_key, annotation_expire, annotation_text)
				values (%s, date_add(now(), interval 10 minute), %s)
		"""
		cursor_update.execute(query, ('top-link-'+site, annotations))
		cursor_update.close()
		DBM.commit()
		print "Stored:", annotations
	else:
		print "No one selected"
def main():
    """
	Main loop of the process
	"""
    # timeout in seconds
    timeout = 10
    socket.setdefaulttimeout(timeout)

    print "------------------------------ BEGIN FEEDS UPDATE -", time.strftime(
        "%c"), "UTC ------------------------------"

    # Delete old entries
    update_cursor = DBM.cursor('update')
    query = """
		DELETE FROM rss
			WHERE date_parsed < date_sub(now(), interval %s day)
	"""
    print "Deleting old entries"
    update_cursor.execute(query, (dbconf.blogs['days_to_keep'], ))
    DBM.commit()
    update_cursor.close()
    """
	Get the possible blog we can read
	"""
    now = time.time()
    cursor = DBM.cursor()

    query = """
		SELECT blog_id, blog_url, blog_feed,
				UNIX_TIMESTAMP(blog_feed_checked),
				UNIX_TIMESTAMP(blog_feed_read)
			FROM sub_statuses, links, blogs
			WHERE 
				(id = 1
				AND status = "published" AND date > date_sub(now(), interval %s day)
				AND link_id = link
				AND blog_id = link_blog
				AND blog_feed_checked is not null
				AND blog_type <> 'disabled'
				AND blog_feed is not null)
		UNION
		SELECT blog_id, blog_url, blog_feed,
                                UNIX_TIMESTAMP(blog_feed_checked),
                                UNIX_TIMESTAMP(blog_feed_read)
			FROM blogs
			WHERE blog_type = 'aggregator'
		GROUP BY blog_id
	"""
    feeds_read = 0
    print "Reading feeds..."
    cursor.execute(query, (dbconf.blogs['days_blogs'], ))
    for row in cursor:
        blog = BaseBlogs()
        blog.id, blog.url, blog.feed, blog.checked, blog.read = row
        blog.user_id = 0
        blog.base_url = blog.url.replace('http://',
                                         '').replace('https://',
                                                     '').replace('www.', '')
        if blog.is_banned():
            continue
        print " >>> Reading: %s (%s)" % (blog.url, blog.feed)
        entries = blog.read_feed()
        print "     Blog ", blog.id, " has ", entries, " entries %s" % blog.url
        feeds_read += 1

    cursor.close()

    print "------------------------------ END - ", feeds_read, " feeds read - ", time.strftime(
        "%c"), "UTC ------------------------------"
def main():
	"""
	Main loop of the process
	"""
	# timeout in seconds
	timeout = 10
	socket.setdefaulttimeout(timeout)

	print "------------------------------ BEGIN FEEDS UPDATE -", time.strftime("%c"), "UTC ------------------------------"

	# Delete old entries
	update_cursor = DBM.cursor('update')
	query = """
		DELETE FROM rss
			WHERE date_parsed < date_sub(now(), interval %s day)
	"""
	print "Deleting old entries"
	update_cursor.execute(query, (dbconf.blogs['days_to_keep'],))
	DBM.commit()
	update_cursor.close()

	"""
	Get the possible blog we can read
	"""
	now = time.time()
	cursor = DBM.cursor()

	query = """
		SELECT blog_id, blog_url, blog_feed,
				UNIX_TIMESTAMP(blog_feed_checked),
				UNIX_TIMESTAMP(blog_feed_read)
			FROM sub_statuses, links, blogs
			WHERE 
				(id = 1
				AND status = "published" AND date > date_sub(now(), interval %s day)
				AND link_id = link
				AND blog_id = link_blog
				AND blog_feed_checked is not null
				AND blog_type <> 'disabled'
				AND blog_feed is not null)
		UNION
		SELECT blog_id, blog_url, blog_feed,
                                UNIX_TIMESTAMP(blog_feed_checked),
                                UNIX_TIMESTAMP(blog_feed_read)
			FROM blogs
			WHERE blog_type = 'aggregator'
		GROUP BY blog_id
	"""
	feeds_read = 0
	print "Reading feeds..."
	cursor.execute(query, (dbconf.blogs['days_blogs'],))
	for row in cursor:
		blog = BaseBlogs()
		blog.id, blog.url, blog.feed, blog.checked, blog.read = row
		blog.user_id = 0
		blog.base_url = blog.url.replace('http://', '').replace('https://', '').replace('www.', '')
		if blog.is_banned():
			continue
		print " >>> Reading: %s (%s)" % (blog.url, blog.feed)
		entries = blog.read_feed()
		print "     Blog ", blog.id, " has ", entries, " entries %s" % blog.url
		feeds_read += 1

	cursor.close()

	print "------------------------------ END - ", feeds_read, " feeds read - ", time.strftime("%c"), "UTC ------------------------------"
Example #6
0
def main():
	global configurations
	activity = {}
	seen_ips = {}

	# Delete old entries
	update_cursor = DBM.cursor('update')
	query = """ DELETE FROM clones WHERE clon_date < date_sub(now(), interval 120 day) """
	update_cursor.execute(query)
	DBM.commit()


	if configuration.hours:
		minutes = configuration.hours * 60
	elif configuration.minutes:
		minutes = configuration.minutes

	print "Analyzing IPs for %d minutes" % minutes
	cursor = DBM.cursor()

	queries = (
		"""select distinct vote_user_id, vote_ip_int from votes where vote_type in ('links', 'comments', 'posts') and vote_user_id != 0 and vote_date > date_sub(now(), interval %s minute)""", 
		"""select distinct comment_user_id, comment_ip_int from comments where comment_date > date_sub(now(), interval %s minute)"""
	)

	for query in queries:
		cursor.execute(query, (minutes,))
		for uid, ip_int in cursor:
			ip = IPAddress(ip_int)
			add_user_ip(uid, ip, activity)
			#print uid, ip_int, ip

	search_from = int(30*24 + (minutes*60));
	print "Analyzing history for %d hours" % search_from

	clones = set()
	ips_counter = {}
	for u, ips in activity.iteritems():
		# To avoid warning of truncated DOUBLE, the list of decimals is passed directly to the mysql driver
		format_strings = ','.join(['%s'] * len(ips))
		query = """select distinct vote_user_id, vote_ip_int from votes where vote_ip_int in (%s) """ % format_strings
		query += """and vote_user_id != %d and vote_user_id > 0 and vote_date > date_sub(now(), interval %d hour)""" % (u, search_from)
		cursor.execute(query, tuple(ips))

		for clon, ip_int in cursor:
			ip = IPAddress(ip_int)
			# print u, clon, ip
			clones.add((u, clon, ip))
			subnet = IPSubnet(ip)
			if subnet not in ips_counter:
				ips_counter[subnet] = 1
			else:
				ips_counter[subnet] += 1

	#print clones, ips_counter

	c = 0
	for u, clon, ip in clones:
		subnet = IPSubnet(ip)
		if ips_counter[subnet] < 30:
			print "Clon:", u, clon, ip, ips_counter[subnet]
			insert = """REPLACE INTO clones (clon_from, clon_to, clon_ip) VALUES (%s, %s, %s)"""
			update_cursor.execute(insert, (u, clon, ip))
			insert = """INSERT IGNORE INTO clones (clon_to, clon_from, clon_ip) VALUES (%s, %s,	%s)"""
			update_cursor.execute(insert, (u, clon, ip))
			c += 1
			if c % 10 == 0:
				DBM.commit()
		else:
			print "Rejected: ", str(ip), subnet, ips_counter[subnet]
	DBM.commit()
Example #7
0
def main():
    global configurations
    activity = {}
    seen_ips = {}

    # Delete old entries
    update_cursor = DBM.cursor('update')
    query = """ DELETE FROM clones WHERE clon_date < date_sub(now(), interval 120 day) """
    update_cursor.execute(query)
    DBM.commit()

    if configuration.hours:
        minutes = configuration.hours * 60
    elif configuration.minutes:
        minutes = configuration.minutes

    print "Analyzing IPs for %d minutes" % minutes
    cursor = DBM.cursor()

    queries = (
        """select distinct vote_user_id, vote_ip_int from votes where vote_type in ('links', 'comments', 'posts') and vote_user_id != 0 and vote_date > date_sub(now(), interval %s minute)""",
        """select distinct comment_user_id, comment_ip_int from comments where comment_date > date_sub(now(), interval %s minute)"""
    )

    for query in queries:
        cursor.execute(query, (minutes, ))
        for uid, ip_int in cursor:
            ip = IPAddress(ip_int)
            add_user_ip(uid, ip, activity)
            #print uid, ip_int, ip

    search_from = int(30 * 24 + (minutes * 60))
    print "Analyzing history for %d hours" % search_from

    clones = set()
    ips_counter = {}
    for u, ips in activity.iteritems():
        # To avoid warning of truncated DOUBLE, the list of decimals is passed directly to the mysql driver
        format_strings = ','.join(['%s'] * len(ips))
        query = """select distinct vote_user_id, vote_ip_int from votes where vote_ip_int in (%s) """ % format_strings
        query += """and vote_user_id != %d and vote_user_id > 0 and vote_date > date_sub(now(), interval %d hour)""" % (
            u, search_from)
        cursor.execute(query, tuple(ips))

        for clon, ip_int in cursor:
            ip = IPAddress(ip_int)
            # print u, clon, ip
            clones.add((u, clon, ip))
            subnet = IPSubnet(ip)
            if subnet not in ips_counter:
                ips_counter[subnet] = 1
            else:
                ips_counter[subnet] += 1

    #print clones, ips_counter

    c = 0
    for u, clon, ip in clones:
        subnet = IPSubnet(ip)
        if ips_counter[subnet] < 30:
            print "Clon:", u, clon, ip, ips_counter[subnet]
            insert = """REPLACE INTO clones (clon_from, clon_to, clon_ip) VALUES (%s, %s, %s)"""
            update_cursor.execute(insert, (u, clon, ip))
            insert = """INSERT IGNORE INTO clones (clon_to, clon_from, clon_ip) VALUES (%s, %s,	%s)"""
            update_cursor.execute(insert, (u, clon, ip))
            c += 1
            if c % 10 == 0:
                DBM.commit()
        else:
            print "Rejected: ", str(ip), subnet, ips_counter[subnet]
    DBM.commit()