Example #1
0
def main():
	"""
	Main loop of the process
	"""
	# timeout in seconds
	timeout = 10
	socket.setdefaulttimeout(timeout)

	# Delete old entries
	update_cursor = DBM.cursor('update')
	query = """
		DELETE FROM rss
			WHERE date < date_sub(now(), interval %s day)
	"""
	update_cursor.execute(query, (dbconf.blogs['days_to_keep'],))
	DBM.commit()
	update_cursor.close()

	users = set()
	news = set()
	blogs = get_candidate_blogs(dbconf.blogs['days_published'],
								dbconf.blogs['min_karma'])
	for blog in blogs:
		entries = blog.read_feed()
		time.sleep(3)
		if entries > 0:
			users.add(blog.user)
			news.add(blog)


	if dbconf.blogs['post_user'] and dbconf.blogs['post_key'] and users:
		post = _('Nuevo apunte en el blog de: ')
		for note in news:
			post += "@" + note.user
			for link in note.links:
				post += " " + link
			post += "\n"

		post += '\nhttp://'+dbconf.domain+dbconf.blogs['viewer']+" #blogs"
		print post
		try:
			url = """
				http://{d}{newpost}?user={post_user}&key={post_key}&text={t}
			""".format(d= dbconf.domain,
						t= urllib.quote_plus(post),
						**dbconf.blogs)
			## TODO: Use timeout parameter instead of
			##       socket.setdefaulttimeout(timeout)
			urlpost = urllib2.urlopen(url)
			print urlpost.read(100)
			urlpost.close()
		except KeyError:
			print "Error posting", url
			pass
Example #2
0
def main():
	"""
	Main loop of the process
	"""
	# timeout in seconds
	timeout = 10
	socket.setdefaulttimeout(timeout)

	# Delete old entries
	update_cursor = DBM.cursor('update')
	query = """
		DELETE FROM rss
			WHERE date < date_sub(now(), interval %s day)
	"""
	update_cursor.execute(query, (dbconf.blogs['days_to_keep'],))
	DBM.commit()
	update_cursor.close()

	users = set()
	news = set()
	blogs = get_candidate_blogs(dbconf.blogs['days_published'],
								dbconf.blogs['min_karma'])
	for blog in blogs:
		entries = blog.read_feed()
		time.sleep(3)
		if entries > 0:
			users.add(blog.user)
			news.add(blog)


	if dbconf.blogs['post_user'] and dbconf.blogs['post_key'] and users:
		post = _('Nuevo apunte en el blog de: ')
		for note in news:
			post += "@" + note.user
			for link in note.links:
				post += " " + link
			post += "\n"

		post += '\nhttp://'+dbconf.domain+dbconf.blogs['viewer']+" #blogs"
		print post
		try:
			url = """
				http://{d}{newpost}?user={post_user}&key={post_key}&text={t}
			""".format(d= dbconf.domain,
						t= urllib.quote_plus(post),
						**dbconf.blogs)
			## TODO: Use timeout parameter instead of
			##       socket.setdefaulttimeout(timeout)
			urlpost = urllib2.urlopen(url)
			print urlpost.read(100)
			urlpost.close()
		except KeyError:
			print "Error posting", url
			pass
Example #3
0
def store(site, entry):
    id = entry['id']
    cursor = DBM.cursor()
    cursor.execute("select link_id from links where link_id = %s", (id, ))
    result = cursor.fetchone()
    if not result:
        return False

    annotation = read_annotation(KEY + str(id))
    if not annotation:
        data = {}
    else:
        data = json.loads(annotation)
        if data[site]:
            if data[site]['ts'] >= entry['ts']:
                return False

            del (data[site])

    data[site] = entry
    data = json.dumps(data)
    if data:
        store_annotation(KEY + str(id), data)
        print data
    cursor.close()
    return entry['ts']
Example #4
0
def store(site, entry):
	id = entry['id']
	cursor = DBM.cursor()
	cursor.execute("select link_id from links where link_id = %s", (id,))
	result = cursor.fetchone()
	if not result:
		return False

	annotation = read_annotation(KEY+str(id))
	if not annotation:
		data = {}
	else:
		data = json.loads(annotation)
		if data[site]:
			if data[site]['ts'] >= entry['ts']:
				return False

			del(data[site])

	data[site] = entry
	data = json.dumps(data)
	if data:
		store_annotation(KEY+str(id), data)
		print data
	cursor.close()
	return entry['ts']
Example #5
0
def main():
	cursor = DBM.cursor()

	query = """select distinct clon.user_login, clon.user_login_register, users.user_login, users.user_login_register, clon.user_level, clon_ip, clon_date from users, users as clon, clones where clon_from = users.user_id and clon_to = clon.user_id and clon_date > date_sub(now(), interval 60 day)"""

	cursor.execute(query)
	print("%-16s (%-20s)\t%-16s (%-20s)\t%-20s\t%-12s\t%s" % ("clon", "clonreg", "user", "userreg", "ip", "level", "date"))
	print("---------------------------------------------------------------------------------------------------------------------------------------------");
	for clon, clonreg, user, userreg, level, ip, date in cursor:
		print("%-16s (%-20s)\t%-16s (%-20s)\t%-20s\t%-12s\t%s" % (clon, clonreg, user, userreg, ip, level, date))
Example #6
0
def main():
    global configuration
    user = configuration.user

    cursor = DBM.cursor()

    query = """select distinct clon.user_login, clon.user_level, clon_ip, clon_date from users, users as clon, clones where users.user_login = %s and clon_from = users.user_id and clon_to = clon.user_id and clon_date > date_sub(now(), interval 60 day)"""

    cursor.execute(query, (user, ))
    for clon, level, ip, date in cursor:
        print("%-16s\t%s\t%s\t%s" % (clon, ip, level, date))
Example #7
0
def main():
	global configuration
	ip = configuration.IP

	cursor = DBM.cursor()

	query = """ SELECT distinct user_login, user_email, user_level, clon_ip FROM users, clones WHERE (clon_ip LIKE %s OR clon_ip LIKE %s) AND (clon_from = user_id OR clon_to = user_id)"""

	cursor.execute(query, ("%s%%" % ip, "COOK:%s%%" % ip))
	for user, email, level, ip in cursor:
		print("%-16s\t%s\t%s\t%s" % (user, email, ip, level))
Example #8
0
def main():
	global configuration
	user = configuration.user

	cursor = DBM.cursor()

	query = """select distinct clon.user_login, clon.user_level, clon_ip, clon_date from users, users as clon, clones where users.user_login = %s and clon_from = users.user_id and clon_to = clon.user_id and clon_date > date_sub(now(), interval 60 day)"""

	cursor.execute(query, (user,))
	for clon, level, ip, date in cursor:
		print("%-16s\t%s\t%s\t%s" % (clon, ip, level, date))
Example #9
0
def main():
    cursor = DBM.cursor()

    query = """select distinct clon.user_login, clon.user_login_register, users.user_login, users.user_login_register, clon.user_level, clon_ip, clon_date from users, users as clon, clones where clon_from = users.user_id and clon_to = clon.user_id and clon_date > date_sub(now(), interval 60 day)"""

    cursor.execute(query)
    print("%-16s (%-20s)\t%-16s (%-20s)\t%-20s\t%-12s\t%s" %
          ("clon", "clonreg", "user", "userreg", "ip", "level", "date"))
    print(
        "---------------------------------------------------------------------------------------------------------------------------------------------"
    )
    for clon, clonreg, user, userreg, level, ip, date in cursor:
        print("%-16s (%-20s)\t%-16s (%-20s)\t%-20s\t%-12s\t%s" %
              (clon, clonreg, user, userreg, ip, level, date))
Example #10
0
def main():
    global configuration
    user = configuration.user

    cursor = DBM.cursor()

    seen = set()
    query = """select vote_ip_int, vote_date from users, votes where user_login=%s and vote_type in ('links', 'comments', 'posts') and vote_user_id=user_id order by vote_date desc"""

    cursor.execute(query, (user, ))
    c = 0
    for ip_int, date in cursor:
        if ip_int not in seen and ip_int > 0:
            print("%s\t%s" % (ipaddr.IPAddress(long(ip_int)), date))
            seen.add(ip_int)
            c += 1
        if c > 20:
            break
Example #11
0
def main():
	global configuration
	user = configuration.user

	cursor = DBM.cursor()

	seen = set()
	query = """select vote_ip_int, vote_date from users, votes where user_login=%s and vote_type in ('links', 'comments', 'posts') and vote_user_id=user_id order by vote_date desc"""

	cursor.execute(query, (user,))
	c = 0
	for ip_int, date in cursor:
		if ip_int not in seen and ip_int > 0:
			print("%s\t%s" % (ipaddr.IPAddress(long(ip_int)), date))
			seen.add(ip_int)
			c += 1
		if c > 20:
			break
Example #12
0
def get_link_average(link_id):
    """ Get the average weight of a link """
    votes = {}
    values_sum = 0
    values_count = 0

    cursor = DBM.cursor()
    query = """
        select vote_user_id, vote_value
            from votes, links
            where vote_type = 'links'
                and vote_link_id = %s
                and vote_user_id > 0
                and vote_value > 0
                and link_id = vote_link_id
                and ( (link_status = 'published'
                    and vote_date < link_date)
                OR link_status != 'published')
    """
    cursor.execute(query, (link_id, ))
    for user_id, vote_value in cursor:
        votes[user_id] = int(vote_value / abs(vote_value))

    sorted_users = [(minor, major)
                    for (minor, major) in itertools.product(votes, repeat=2)
                    if major > minor]

    for values_count, (minor, major) in enumerate(sorted_users, start=1):
        query = """
            select value, UNIX_TIMESTAMP(date)
                from users_similarities
                where minor = %s
                    and major = %s
        """
        cursor.execute(query, (minor, major))
        row = cursor.fetchone()
        values_sum += 0 if row is None else row[0]

    print values_sum, values_count
    average = values_sum / values_count
    return average
Example #13
0
def get_link_average(link_id):
	""" Get the average weight of a link """
	votes = {}
	values_sum = 0
	values_count = 0

	cursor = DBM.cursor()
	query = """
		select vote_user_id, vote_value
			from votes, links
			where vote_type = 'links'
				and vote_link_id = %s
				and vote_user_id > 0
				and vote_value > 0
				and link_id = vote_link_id
				and ( (link_status = 'published'
					and vote_date < link_date)
				OR link_status != 'published')
	"""
	cursor.execute(query, (link_id, ))
	for user_id, vote_value in cursor:
		votes[user_id] = int(vote_value / abs(vote_value))

	sorted_users = [(minor, major) for (minor, major)
									in itertools.product(votes, repeat= 2)
										if major > minor]

	for values_count, (minor, major) in enumerate(sorted_users, start = 1):
		query = """
			select value, UNIX_TIMESTAMP(date)
				from users_similarities
				where minor = %s
					and major = %s
		"""
		cursor.execute(query, (minor, major))
		row = cursor.fetchone()
		values_sum += 0 if row is None else row[0]

	print values_sum, values_count
	average = values_sum/values_count
	return average
Example #14
0
def main():
    """ Main loop, processing the top 20 published links"""
    if len(sys.argv) == 2:
        link_id = int(sys.argv[1])
        print get_link_average(link_id)
    else:
        total = 0
        average = 0
        cursor = DBM.cursor()
        query = """
            select link_id
                from links
                where link_status = 'published'
                order by link_date desc
                limit 20"
        """
        cursor.execute(query)
        for total, link_id in enumerate(cursor, start=1):
            average += get_link_average(link_id)

        assert total > 0, "No published links."

        print average / total
Example #15
0
def main():
    """ Main loop, processing the top 20 published links"""
    if len(sys.argv) == 2:
        link_id = int(sys.argv[1])
        print get_link_average(link_id)
    else:
        total = 0
        average = 0
        cursor = DBM.cursor()
        query = """
            select link_id
                from links
                where link_status = 'published'
                order by link_date desc
                limit 20"
        """
        cursor.execute(query)
        for total, link_id in enumerate(cursor, start=1):
            average += get_link_average(link_id)

        assert total > 0, "No published links."

        print average / total
Example #16
0
def get_candidate_blogs(days, min_karma):
	"""
	Get the possible blog we can read
	"""
	now = time.time()
	blogs = set()
	results = set()
	blogs_ids = set()
	users_ids = set()
	cursor = DBM.cursor()
	inner_cursor = DBM.cursor()

	# Select users that have at least one published

	query = """
		SELECT link_blog, blog_url, blog_feed,
				UNIX_TIMESTAMP(blog_feed_checked),
				UNIX_TIMESTAMP(blog_feed_read)
			FROM links, blogs
			WHERE link_status in ('published')
				AND link_date > date_sub(now(), interval %s day)
				AND blog_id = link_blog
				AND blog_type='blog'
				AND (blog_feed_read is null
						OR blog_feed_read < date_sub(now(), interval 1 hour))
			GROUP BY blog_id
			HAVING count(*) < %s
	"""
	cursor.execute(query, (days, days))
	for row in cursor:
		blog = BaseBlogs()
		blog.id, blog.url, blog.feed, blog.checked, blog.read = row
		blog.base_url = blog.url.replace('http://', '').\
							replace('https://', '').replace('www.', '')
		if blog.is_banned():
			continue

		query = """
			SELECT user_login, user_id, user_karma
				FROM users
				WHERE user_url in (%s, %s, %s, %s, %s, %s)
					AND user_karma > %s
					AND user_level not in ('disabled', 'autodisabled')
				ORDER BY user_karma desc limit 1"
		"""
		inner_cursor.execute(query,('http://'+blog.base_url,
						 'http://www.'+blog.base_url,
						 'http://'+blog.base_url+'/',
						 'http://www.'+blog.base_url+'/',
						 blog.base_url,
						 'www.'+blog.base_url,
						 min_karma))

		result = inner_cursor.fetchone()
		if result:
			blog.user, blog.user_id, blog.karma = result
			blogs.add(blog)
			blogs_ids.add(blog.id)
			users_ids.add(blog.user_id)



	# Select active users that have no published posts
	query = """
	SELECT blog_id, blog_url, blog_feed, UNIX_TIMESTAMP(blog_feed_checked),
			UNIX_TIMESTAMP(blog_feed_read), user_login, user_id, user_karma
		FROM users, blogs
		WHERE user_karma >= %s
			AND user_url like 'http://%%'
			AND user_level not in ('disabled', 'autodisabled')
			AND user_modification > date_sub(now(), interval %s day)
			AND user_date < date_sub(now(), interval %s day)
			AND blog_url in (
				concat('http://www.',replace(replace(user_url, 'http://', ''), 'www.', '')),
				concat('http://',replace(replace(user_url, 'http://', ''), 'www.', '')),
				concat('http://www.',replace(replace(user_url, 'http://', ''), 'www.', ''), '/'),
				concat('http://',replace(replace(user_url, 'http://', ''), 'www.', ''), '/')
			)
			AND (blog_feed_read is null or blog_feed_read < date_sub(now(), interval 1 hour))
			order by blog_id desc, user_karma desc
	"""
	cursor.execute(query, (dbconf.blogs['active_min_karma'],
						dbconf.blogs['active_min_activity'],
						dbconf.blogs['active_min_age']) )
	for row in cursor:
		blog = BaseBlogs()
		blog.id, blog.url, blog.feed, \
		blog.checked, blog.read, blog.user, blog.user_id, blog.karma = row
		blog.base_url = blog.url.replace('http://', '').\
							replace('https://', '').replace('www.', '')
		if blog.id not in blogs_ids and blog.user_id not in users_ids:
			blogs.add(blog)
			users_ids.add(blog.user_id)
			blogs_ids.add(blog.id)


	feeds_read = 0
	# Sort the set of blogs by date of read
	## TODO: This sort should be changed with rich comparators in BaseBlog
	sorted_blogs = sorted(blogs, key=lambda x: x.read)
	for blog in sorted_blogs:
		if feeds_read >= dbconf.blogs['max_feeds']:
			break
		## TODO: Solve this with a list comprehension
		if not blog.is_banned():
				# Check the number of remaining entries
				query = """
				SELECT count(*)
					FROM rss
					WHERE user_id = %s
						AND date > date_sub(now(), interval 1 day)
				"""
				inner_cursor.execute(query, (blog.user_id,))
				n_entries, = inner_cursor.fetchone()
				# Calculate the number of remaining entries
				blog.max = int(round(blog.karma/dbconf.blogs['karma_divisor'])) \
							- n_entries
				if not blog.max > 0:
					print "Max entries <= 0:", n_entries, blog.karma, blog.url
					continue

				if (not blog.feed and (not blog.checked or
									 blog.checked < now - 86400)) \
						or (blog.checked and blog.checked < now - 86400*7):
					blog.get_feed_info()

				if blog.feed and (not blog.read or blog.read < now - 3600):
					results.add(blog)
					print "Added ", blog.id, blog.user, blog.url
					feeds_read += 1
	cursor.close()
	return results
Example #17
0
def main():
	""" Main loop of top-news """
	cursor = DBM.cursor()
	cursor.execute("select id, name from subs where enabled = 1")
	for row in cursor:
		do_site(row[1])
Example #18
0
def do_site(site):
	""" Process a given site """
	links = {}
	cursor = DBM.cursor()
	query = """
		select link_id, link_uri,
			unix_timestamp(now()) - unix_timestamp(link_date)
		from links, subs, sub_statuses
		where subs.name = %s
			and subs.id = sub_statuses.id
			and status = 'published'
			and date > date_sub(now(), interval 24 hour)
			and link = link_id
			and link_votes/20 > link_negatives
		order by link_date desc
	"""
	cursor.execute(query, (site,))
	links_total = 0
	for link_id, link_uri, old in cursor:
		links_total += 1
		values = {}
		values['uri'] = link_uri
		# How old in seconds
		values['old'] = old
		values['w'] = 0
		values['c'] = 0
		values['v'] = 0
		values['links_order'] = links_total
		links[link_id] = values

	if not links_total:
		return

	links_format = ','.join(['%s'] * len(links))
	query = """
		select vote_link_id,
			sum((1-(unix_timestamp(now())
					- unix_timestamp(vote_date))/36000)) as x,
			count(*)
		from votes
		where vote_link_id in (%s)
			and vote_type='links'
			and vote_date > date_sub(now(), interval 12 hour)
			and vote_user_id > 0
			and vote_value > 6.1
		group by vote_link_id
		order by x desc
	"""  % links_format
	cursor.execute(query, tuple(links))
	votes_total = 0
	votes_links = 0
	v_total = 0
	v_list = {}
	for link_id, old, votes in cursor:
		votes_links += 1
		votes_old = float(old)
		links[link_id]['v'] = votes_old
		v_total += votes_old
		v_list[link_id] = votes_old
		links[link_id]['votes'] = votes
		votes_total += votes
		links[link_id]['votes_order'] = votes_links

	if not votes_links:
		return

	v_average = v_total/votes_links
	votes_average = votes_total/votes_links

	query = """
		select comment_link_id,
			sum(1.5*(1-(unix_timestamp(now())
						- unix_timestamp(comment_date))/36000)),
			count(*)
		from comments
		where comment_link_id in (%s)
			and comment_date > date_sub(now(), interval 12 hour)
		group by comment_link_id
	""" % links_format
	cursor.execute(query, tuple(links))
	comments_total = 0
	comments_links = 0
	c_total = 0
	c_list = {}
	for link_id, old, count in cursor:
		comment_old = float(old)
		comments_links += 1
		links[link_id]['c'] = comment_old
		c_total += comment_old
		c_list[link_id] = comment_old
		links[link_id]['comments'] = count
		comments_total += count

	if not comments_links:
		return

	c_average = c_total/comments_links
	comments_average = comments_total/comments_links
	query = """
		select id, counter from link_clicks where id in (%s)
	""" % links_format

	cursor.execute(query, tuple(links))
	for link_id, clicks in cursor:
		links[link_id]['clicks'] = clicks

	cursor.close()

	print "Site:", site, "Votes average:", votes_average, v_average, \
			"Comments average:", comments_average, c_average

	for link_id, link_value in links.items():
		if link_value['c'] > 0 \
				and link_value['v'] > 0 \
				and 'clicks' in link_value:
			links[link_id]['w'] = (1 - link_value['old']/(1.5*86400)) \
						   * (link_value['v'] \
						   + link_value['c'] \
						   + link_value['clicks'] \
						   * (1 - link_value['old']/86400) * 0.01)

	sorted_ids = sorted(links, cmp=lambda x, y:
											cmp(links[y]['w'], links[x]['w']))

	if sorted_ids:
		annotations = ','.join([unicode(x) for x in sorted_ids[:10]])
		cursor_update = DBM.cursor('update')
		query = """
			replace into annotations
				(annotation_key, annotation_expire, annotation_text)
				values (%s, date_add(now(), interval 15 minute), %s)
		"""
		cursor_update.execute(query, ('top-actives-'+site, annotations))
		cursor_update.close()
		DBM.commit()

	i = 0
	for key in sorted_ids:
		if links[key]['w'] > 0 and i < 10:
			i += 1


	# Select the top stories
	annotations = ','.join([unicode(x) for x in sorted_ids
						if links[x]['w'] > dbconf.tops['min-weight']
							and (links[x]['links_order'] > 1
							or links[x]['old'] > 3600)
							and links[x]['c'] > c_avrg(c_list, x) * 4
							and links[x]['v'] > c_avrg(v_list, x) * 4
							and links[x]['votes_order'] <= 10 ])

	print "SELECT: ", site, annotations

	if annotations:
		cursor_update = DBM.cursor('update')
		query = """
			replace into annotations
				(annotation_key, annotation_expire, annotation_text)
				values (%s, date_add(now(), interval 10 minute), %s)
		"""
		cursor_update.execute(query, ('top-link-'+site, annotations))
		cursor_update.close()
		DBM.commit()
		print "Stored:", annotations
	else:
		print "No one selected"
Example #19
0
def main():
    """
	Main loop of the process
	"""
    # timeout in seconds
    timeout = 10
    socket.setdefaulttimeout(timeout)

    print "------------------------------ BEGIN FEEDS UPDATE -", time.strftime(
        "%c"), "UTC ------------------------------"

    # Delete old entries
    update_cursor = DBM.cursor('update')
    query = """
		DELETE FROM rss
			WHERE date_parsed < date_sub(now(), interval %s day)
	"""
    print "Deleting old entries"
    update_cursor.execute(query, (dbconf.blogs['days_to_keep'], ))
    DBM.commit()
    update_cursor.close()
    """
	Get the possible blog we can read
	"""
    now = time.time()
    cursor = DBM.cursor()

    query = """
		SELECT blog_id, blog_url, blog_feed,
				UNIX_TIMESTAMP(blog_feed_checked),
				UNIX_TIMESTAMP(blog_feed_read)
			FROM sub_statuses, links, blogs
			WHERE 
				(id = 1
				AND status = "published" AND date > date_sub(now(), interval %s day)
				AND link_id = link
				AND blog_id = link_blog
				AND blog_feed_checked is not null
				AND blog_type <> 'disabled'
				AND blog_feed is not null)
		UNION
		SELECT blog_id, blog_url, blog_feed,
                                UNIX_TIMESTAMP(blog_feed_checked),
                                UNIX_TIMESTAMP(blog_feed_read)
			FROM blogs
			WHERE blog_type = 'aggregator'
		GROUP BY blog_id
	"""
    feeds_read = 0
    print "Reading feeds..."
    cursor.execute(query, (dbconf.blogs['days_blogs'], ))
    for row in cursor:
        blog = BaseBlogs()
        blog.id, blog.url, blog.feed, blog.checked, blog.read = row
        blog.user_id = 0
        blog.base_url = blog.url.replace('http://',
                                         '').replace('https://',
                                                     '').replace('www.', '')
        if blog.is_banned():
            continue
        print " >>> Reading: %s (%s)" % (blog.url, blog.feed)
        entries = blog.read_feed()
        print "     Blog ", blog.id, " has ", entries, " entries %s" % blog.url
        feeds_read += 1

    cursor.close()

    print "------------------------------ END - ", feeds_read, " feeds read - ", time.strftime(
        "%c"), "UTC ------------------------------"
Example #20
0
def main():
    """
	Main loop of the process
	"""
    # timeout in seconds
    timeout = 10
    socket.setdefaulttimeout(timeout)

    print "------------------------------ BEGIN RSS CHECK -", time.strftime(
        "%c"), " UTC ------------------------------"
    """
	Get the possible blog we can read
	"""
    now = time.time()
    blogs = set()
    results = set()
    cursor = DBM.cursor()

    #query = """
    #	SELECT blog_id, blog_url, blog_feed,
    #			UNIX_TIMESTAMP(blog_feed_checked),
    #			UNIX_TIMESTAMP(blog_feed_read)
    #		FROM sub_statuses, links, blogs
    #		WHERE
    #			(id = 1 AND status = "published" AND date > date_sub(now(), interval %s day)
    #			 AND link_id = link
    #			 AND blog_id = link_blog
    #			 AND blog_type not in ('disabled', 'aggregator')
    #			 AND (blog_feed_checked is null OR blog_feed_checked < date_sub(now(), interval %s day)))
    #	UNION
    #	SELECT blog_id, blog_url, blog_feed,
    #                       UNIX_TIMESTAMP(blog_feed_checked),
    #                        UNIX_TIMESTAMP(blog_feed_read)
    #		FROM blogs
    #		WHERE blog_type = 'aggregator'
    #	GROUP BY blog_id
    #"""

    query = """
		SELECT blog_id, blog_url, blog_feed,
				UNIX_TIMESTAMP(blog_feed_checked),
				UNIX_TIMESTAMP(blog_feed_read)
			FROM sub_statuses, links, blogs
			WHERE 
				(id = 1 AND status = "published" AND date > date_sub(now(), interval %s day)
				 AND link_id = link
				 AND blog_id = link_blog
				 AND blog_type not in ('disabled', 'aggregator')
				 AND (blog_feed_checked is null OR blog_feed_checked < date_sub(now(), interval %s day)))
		GROUP BY blog_id
	"""

    cursor.execute(
        query,
        (dbconf.blogs['days_blogs'], dbconf.blogs['days_blogs_checked']))
    for row in cursor:
        blog = BaseBlogs()
        blog.id, blog.url, blog.feed, blog.checked, blog.read = row
        blog.user_id = 0
        blog.base_url = blog.url.replace('http://',
                                         '').replace('https://',
                                                     '').replace('www.', '')
        if blog.is_banned():
            continue
        blogs.add(blog)

    cursor.close()

    print("Checking blogs: (%s)" % len(blogs))

    feeds_read = 0
    # Sort the set of blogs by date of read
    sorted_blogs = sorted(blogs, key=lambda x: x.read)
    for blog in sorted_blogs:
        if not blog.is_banned():
            blog.get_feed_info()

            if blog.feed:
                print " > Added ", blog.id, blog.url, blog.feed
                feeds_read += 1

    print "------------------------------ END - Blogs added: ", feeds_read, " - ", time.strftime(
        "%c"), " UTC ------------------------------"
Example #21
0
def main():
	"""
	Main loop of the process
	"""
	# timeout in seconds
	timeout = 10
	socket.setdefaulttimeout(timeout)

	print "------------------------------ BEGIN RSS CHECK -", time.strftime("%c"), " UTC ------------------------------"

	"""
	Get the possible blog we can read
	"""
	now = time.time()
	blogs = set()
	results = set()
	cursor = DBM.cursor()

	#query = """
	#	SELECT blog_id, blog_url, blog_feed,
	#			UNIX_TIMESTAMP(blog_feed_checked),
	#			UNIX_TIMESTAMP(blog_feed_read)
	#		FROM sub_statuses, links, blogs
	#		WHERE 
	#			(id = 1 AND status = "published" AND date > date_sub(now(), interval %s day)
	#			 AND link_id = link
	#			 AND blog_id = link_blog
	#			 AND blog_type not in ('disabled', 'aggregator')
	#			 AND (blog_feed_checked is null OR blog_feed_checked < date_sub(now(), interval %s day)))
	#	UNION
	#	SELECT blog_id, blog_url, blog_feed,
        #                       UNIX_TIMESTAMP(blog_feed_checked),
        #                        UNIX_TIMESTAMP(blog_feed_read)
	#		FROM blogs
	#		WHERE blog_type = 'aggregator'
	#	GROUP BY blog_id
	#"""

	query = """
		SELECT blog_id, blog_url, blog_feed,
				UNIX_TIMESTAMP(blog_feed_checked),
				UNIX_TIMESTAMP(blog_feed_read)
			FROM sub_statuses, links, blogs
			WHERE 
				(id = 1 AND status = "published" AND date > date_sub(now(), interval %s day)
				 AND link_id = link
				 AND blog_id = link_blog
				 AND blog_type not in ('disabled', 'aggregator')
				 AND (blog_feed_checked is null OR blog_feed_checked < date_sub(now(), interval %s day)))
		GROUP BY blog_id
	"""

	cursor.execute(query, (dbconf.blogs['days_blogs'], dbconf.blogs['days_blogs_checked']))
	for row in cursor:
		blog = BaseBlogs()
		blog.id, blog.url, blog.feed, blog.checked, blog.read = row
		blog.user_id = 0
		blog.base_url = blog.url.replace('http://', '').replace('https://', '').replace('www.', '')
		if blog.is_banned():
			continue
		blogs.add(blog)

	cursor.close()

	print("Checking blogs: (%s)" % len(blogs))

	feeds_read = 0
	# Sort the set of blogs by date of read
	sorted_blogs = sorted(blogs, key=lambda x: x.read)
	for blog in sorted_blogs:
		if not blog.is_banned():
			blog.get_feed_info()

			if blog.feed:
				print " > Added ", blog.id, blog.url, blog.feed
				feeds_read += 1

	print "------------------------------ END - Blogs added: ", feeds_read, " - ", time.strftime("%c"), " UTC ------------------------------"
Example #22
0
def get_candidate_blogs(days, min_karma):
    """
	Get the possible blog we can read
	"""
    now = time.time()
    blogs = set()
    results = set()
    blogs_ids = set()
    users_ids = set()
    cursor = DBM.cursor()
    inner_cursor = DBM.cursor()

    # Select users that have at least one published

    query = """
		SELECT link_blog, blog_url, blog_feed,
				UNIX_TIMESTAMP(blog_feed_checked),
				UNIX_TIMESTAMP(blog_feed_read)
			FROM sub_statuses, links, blogs
			WHERE 
				id = 1 AND status = "published" AND date > date_sub(now(), interval %s day)
				AND link_id = link
				AND blog_id = link_blog
				AND blog_type in ('blog', 'noiframe')
				AND (blog_feed_read is null
						OR blog_feed_read < date_sub(now(), interval 1 hour))
			GROUP BY blog_id
			HAVING count(*) < %s
	"""
    cursor.execute(query, (days, days / 3))
    for row in cursor:
        blog = BaseBlogs()
        blog.id, blog.url, blog.feed, blog.checked, blog.read = row
        blog.base_url = blog.url.replace('http://', '').\
             replace('https://', '').replace('www.', '')
        if blog.is_banned():
            continue

        query = """
			SELECT user_login, user_id, user_karma
				FROM users USE INDEX (user_url) 
				WHERE user_url in (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
					AND user_karma > %s
					AND user_level not in ('disabled', 'autodisabled')
				ORDER BY user_karma desc limit 1
		"""
        inner_cursor.execute(
            query,
            ('http://' + blog.base_url, 'http://www.' + blog.base_url,
             'http://' + blog.base_url + '/', 'http://www.' + blog.base_url +
             '/', 'https://' + blog.base_url, 'https://www.' + blog.base_url,
             'https://' + blog.base_url + '/', 'https://www.' + blog.base_url +
             '/', blog.base_url, 'www.' + blog.base_url, min_karma))

        result = inner_cursor.fetchone()
        if result:
            blog.user, blog.user_id, blog.karma = result
            blogs.add(blog)
            blogs_ids.add(blog.id)
            users_ids.add(blog.user_id)

    print("End published blogs (%s)" % len(blogs))

    # Select active users that have no published posts
    query = """
	SELECT blog_id, blog_url, blog_feed, UNIX_TIMESTAMP(blog_feed_checked),
			UNIX_TIMESTAMP(blog_feed_read), user_login, user_id, user_karma
		FROM users, blogs
		WHERE user_karma >= %s
			AND user_url like 'http%%'
			AND user_level not in ('disabled', 'autodisabled')
			AND user_modification > date_sub(now(), interval %s day)
			AND user_date < date_sub(now(), interval %s day)
			AND blog_url in (
				concat('http://www.',replace(replace(user_url, 'http://', ''), 'www.', '')),
				concat('http://',replace(replace(user_url, 'http://', ''), 'www.', '')),
				concat('http://www.',replace(replace(user_url, 'http://', ''), 'www.', ''), '/'),
				concat('http://',replace(replace(user_url, 'http://', ''), 'www.', ''), '/')
			)
			AND (blog_feed_read is null or blog_feed_read < date_sub(now(), interval 1 hour))
			order by blog_id desc, user_karma desc
	"""
    print(
        query %
        (dbconf.blogs['active_min_karma'], dbconf.blogs['active_min_activity'],
         dbconf.blogs['active_min_age']))

    cursor.execute(
        query,
        (dbconf.blogs['active_min_karma'], dbconf.blogs['active_min_activity'],
         dbconf.blogs['active_min_age']))
    for row in cursor:
        blog = BaseBlogs()
        blog.id, blog.url, blog.feed, \
        blog.checked, blog.read, blog.user, blog.user_id, blog.karma = row
        blog.base_url = blog.url.replace('http://', '').\
             replace('https://', '').replace('www.', '')
        if blog.id not in blogs_ids and blog.user_id not in users_ids:
            blogs.add(blog)
            users_ids.add(blog.user_id)
            blogs_ids.add(blog.id)

    feeds_read = 0
    # Sort the set of blogs by date of read
    ## TODO: This sort should be changed with rich comparators in BaseBlog
    sorted_blogs = sorted(blogs, key=lambda x: x.read)
    for blog in sorted_blogs:
        if feeds_read >= dbconf.blogs['max_feeds']:
            break
        ## TODO: Solve this with a list comprehension
        if not blog.is_banned():
            # Check the number of remaining entries
            query = """
				SELECT count(*)
					FROM rss
					WHERE user_id = %s
						AND date > date_sub(now(), interval 1 day)
				"""
            inner_cursor.execute(query, (blog.user_id, ))
            n_entries, = inner_cursor.fetchone()
            # Calculate the number of remaining entries
            blog.max = int(round(blog.karma/dbconf.blogs['karma_divisor'])) \
               - n_entries
            if not blog.max > 0:
                print "Max entries <= 0:", n_entries, blog.karma, blog.url
                continue

            if (not blog.feed and (not blog.checked or
                  blog.checked < now - 86400)) \
              or (blog.checked and blog.checked < now - 86400*7):
                blog.get_feed_info()

            if blog.feed and (not blog.read or blog.read < now - 3600):
                results.add(blog)
                print "Added ", blog.id, blog.user, blog.url
                feeds_read += 1
    cursor.close()
    return results
Example #23
0
def main():
	"""
	Main loop of the process
	"""
	# timeout in seconds
	timeout = 10
	socket.setdefaulttimeout(timeout)

	print "------------------------------ BEGIN FEEDS UPDATE -", time.strftime("%c"), "UTC ------------------------------"

	# Delete old entries
	update_cursor = DBM.cursor('update')
	query = """
		DELETE FROM rss
			WHERE date_parsed < date_sub(now(), interval %s day)
	"""
	print "Deleting old entries"
	update_cursor.execute(query, (dbconf.blogs['days_to_keep'],))
	DBM.commit()
	update_cursor.close()

	"""
	Get the possible blog we can read
	"""
	now = time.time()
	cursor = DBM.cursor()

	query = """
		SELECT blog_id, blog_url, blog_feed,
				UNIX_TIMESTAMP(blog_feed_checked),
				UNIX_TIMESTAMP(blog_feed_read)
			FROM sub_statuses, links, blogs
			WHERE 
				(id = 1
				AND status = "published" AND date > date_sub(now(), interval %s day)
				AND link_id = link
				AND blog_id = link_blog
				AND blog_feed_checked is not null
				AND blog_type <> 'disabled'
				AND blog_feed is not null)
		UNION
		SELECT blog_id, blog_url, blog_feed,
                                UNIX_TIMESTAMP(blog_feed_checked),
                                UNIX_TIMESTAMP(blog_feed_read)
			FROM blogs
			WHERE blog_type = 'aggregator'
		GROUP BY blog_id
	"""
	feeds_read = 0
	print "Reading feeds..."
	cursor.execute(query, (dbconf.blogs['days_blogs'],))
	for row in cursor:
		blog = BaseBlogs()
		blog.id, blog.url, blog.feed, blog.checked, blog.read = row
		blog.user_id = 0
		blog.base_url = blog.url.replace('http://', '').replace('https://', '').replace('www.', '')
		if blog.is_banned():
			continue
		print " >>> Reading: %s (%s)" % (blog.url, blog.feed)
		entries = blog.read_feed()
		print "     Blog ", blog.id, " has ", entries, " entries %s" % blog.url
		feeds_read += 1

	cursor.close()

	print "------------------------------ END - ", feeds_read, " feeds read - ", time.strftime("%c"), "UTC ------------------------------"
Example #24
0
def main():
    global configurations
    activity = {}
    seen_ips = {}

    # Delete old entries
    update_cursor = DBM.cursor('update')
    query = """ DELETE FROM clones WHERE clon_date < date_sub(now(), interval 120 day) """
    update_cursor.execute(query)
    DBM.commit()

    if configuration.hours:
        minutes = configuration.hours * 60
    elif configuration.minutes:
        minutes = configuration.minutes

    print "Analyzing IPs for %d minutes" % minutes
    cursor = DBM.cursor()

    queries = (
        """select distinct vote_user_id, vote_ip_int from votes where vote_type in ('links', 'comments', 'posts') and vote_user_id != 0 and vote_date > date_sub(now(), interval %s minute)""",
        """select distinct comment_user_id, comment_ip_int from comments where comment_date > date_sub(now(), interval %s minute)"""
    )

    for query in queries:
        cursor.execute(query, (minutes, ))
        for uid, ip_int in cursor:
            ip = IPAddress(ip_int)
            add_user_ip(uid, ip, activity)
            #print uid, ip_int, ip

    search_from = int(30 * 24 + (minutes * 60))
    print "Analyzing history for %d hours" % search_from

    clones = set()
    ips_counter = {}
    for u, ips in activity.iteritems():
        # To avoid warning of truncated DOUBLE, the list of decimals is passed directly to the mysql driver
        format_strings = ','.join(['%s'] * len(ips))
        query = """select distinct vote_user_id, vote_ip_int from votes where vote_ip_int in (%s) """ % format_strings
        query += """and vote_user_id != %d and vote_user_id > 0 and vote_date > date_sub(now(), interval %d hour)""" % (
            u, search_from)
        cursor.execute(query, tuple(ips))

        for clon, ip_int in cursor:
            ip = IPAddress(ip_int)
            # print u, clon, ip
            clones.add((u, clon, ip))
            subnet = IPSubnet(ip)
            if subnet not in ips_counter:
                ips_counter[subnet] = 1
            else:
                ips_counter[subnet] += 1

    #print clones, ips_counter

    c = 0
    for u, clon, ip in clones:
        subnet = IPSubnet(ip)
        if ips_counter[subnet] < 30:
            print "Clon:", u, clon, ip, ips_counter[subnet]
            insert = """REPLACE INTO clones (clon_from, clon_to, clon_ip) VALUES (%s, %s, %s)"""
            update_cursor.execute(insert, (u, clon, ip))
            insert = """INSERT IGNORE INTO clones (clon_to, clon_from, clon_ip) VALUES (%s, %s,	%s)"""
            update_cursor.execute(insert, (u, clon, ip))
            c += 1
            if c % 10 == 0:
                DBM.commit()
        else:
            print "Rejected: ", str(ip), subnet, ips_counter[subnet]
    DBM.commit()
Example #25
0
def main():
	global configurations
	activity = {}
	seen_ips = {}

	# Delete old entries
	update_cursor = DBM.cursor('update')
	query = """ DELETE FROM clones WHERE clon_date < date_sub(now(), interval 120 day) """
	update_cursor.execute(query)
	DBM.commit()


	if configuration.hours:
		minutes = configuration.hours * 60
	elif configuration.minutes:
		minutes = configuration.minutes

	print "Analyzing IPs for %d minutes" % minutes
	cursor = DBM.cursor()

	queries = (
		"""select distinct vote_user_id, vote_ip_int from votes where vote_type in ('links', 'comments', 'posts') and vote_user_id != 0 and vote_date > date_sub(now(), interval %s minute)""", 
		"""select distinct comment_user_id, comment_ip_int from comments where comment_date > date_sub(now(), interval %s minute)"""
	)

	for query in queries:
		cursor.execute(query, (minutes,))
		for uid, ip_int in cursor:
			ip = IPAddress(ip_int)
			add_user_ip(uid, ip, activity)
			#print uid, ip_int, ip

	search_from = int(30*24 + (minutes*60));
	print "Analyzing history for %d hours" % search_from

	clones = set()
	ips_counter = {}
	for u, ips in activity.iteritems():
		# To avoid warning of truncated DOUBLE, the list of decimals is passed directly to the mysql driver
		format_strings = ','.join(['%s'] * len(ips))
		query = """select distinct vote_user_id, vote_ip_int from votes where vote_ip_int in (%s) """ % format_strings
		query += """and vote_user_id != %d and vote_user_id > 0 and vote_date > date_sub(now(), interval %d hour)""" % (u, search_from)
		cursor.execute(query, tuple(ips))

		for clon, ip_int in cursor:
			ip = IPAddress(ip_int)
			# print u, clon, ip
			clones.add((u, clon, ip))
			subnet = IPSubnet(ip)
			if subnet not in ips_counter:
				ips_counter[subnet] = 1
			else:
				ips_counter[subnet] += 1

	#print clones, ips_counter

	c = 0
	for u, clon, ip in clones:
		subnet = IPSubnet(ip)
		if ips_counter[subnet] < 30:
			print "Clon:", u, clon, ip, ips_counter[subnet]
			insert = """REPLACE INTO clones (clon_from, clon_to, clon_ip) VALUES (%s, %s, %s)"""
			update_cursor.execute(insert, (u, clon, ip))
			insert = """INSERT IGNORE INTO clones (clon_to, clon_from, clon_ip) VALUES (%s, %s,	%s)"""
			update_cursor.execute(insert, (u, clon, ip))
			c += 1
			if c % 10 == 0:
				DBM.commit()
		else:
			print "Rejected: ", str(ip), subnet, ips_counter[subnet]
	DBM.commit()