def main(): """ Main loop of the process """ # timeout in seconds timeout = 10 socket.setdefaulttimeout(timeout) # Delete old entries update_cursor = DBM.cursor('update') query = """ DELETE FROM rss WHERE date < date_sub(now(), interval %s day) """ update_cursor.execute(query, (dbconf.blogs['days_to_keep'],)) DBM.commit() update_cursor.close() users = set() news = set() blogs = get_candidate_blogs(dbconf.blogs['days_published'], dbconf.blogs['min_karma']) for blog in blogs: entries = blog.read_feed() time.sleep(3) if entries > 0: users.add(blog.user) news.add(blog) if dbconf.blogs['post_user'] and dbconf.blogs['post_key'] and users: post = _('Nuevo apunte en el blog de: ') for note in news: post += "@" + note.user for link in note.links: post += " " + link post += "\n" post += '\nhttp://'+dbconf.domain+dbconf.blogs['viewer']+" #blogs" print post try: url = """ http://{d}{newpost}?user={post_user}&key={post_key}&text={t} """.format(d= dbconf.domain, t= urllib.quote_plus(post), **dbconf.blogs) ## TODO: Use timeout parameter instead of ## socket.setdefaulttimeout(timeout) urlpost = urllib2.urlopen(url) print urlpost.read(100) urlpost.close() except KeyError: print "Error posting", url pass
def store(site, entry): id = entry['id'] cursor = DBM.cursor() cursor.execute("select link_id from links where link_id = %s", (id, )) result = cursor.fetchone() if not result: return False annotation = read_annotation(KEY + str(id)) if not annotation: data = {} else: data = json.loads(annotation) if data[site]: if data[site]['ts'] >= entry['ts']: return False del (data[site]) data[site] = entry data = json.dumps(data) if data: store_annotation(KEY + str(id), data) print data cursor.close() return entry['ts']
def store(site, entry): id = entry['id'] cursor = DBM.cursor() cursor.execute("select link_id from links where link_id = %s", (id,)) result = cursor.fetchone() if not result: return False annotation = read_annotation(KEY+str(id)) if not annotation: data = {} else: data = json.loads(annotation) if data[site]: if data[site]['ts'] >= entry['ts']: return False del(data[site]) data[site] = entry data = json.dumps(data) if data: store_annotation(KEY+str(id), data) print data cursor.close() return entry['ts']
def main(): cursor = DBM.cursor() query = """select distinct clon.user_login, clon.user_login_register, users.user_login, users.user_login_register, clon.user_level, clon_ip, clon_date from users, users as clon, clones where clon_from = users.user_id and clon_to = clon.user_id and clon_date > date_sub(now(), interval 60 day)""" cursor.execute(query) print("%-16s (%-20s)\t%-16s (%-20s)\t%-20s\t%-12s\t%s" % ("clon", "clonreg", "user", "userreg", "ip", "level", "date")) print("---------------------------------------------------------------------------------------------------------------------------------------------"); for clon, clonreg, user, userreg, level, ip, date in cursor: print("%-16s (%-20s)\t%-16s (%-20s)\t%-20s\t%-12s\t%s" % (clon, clonreg, user, userreg, ip, level, date))
def main(): global configuration user = configuration.user cursor = DBM.cursor() query = """select distinct clon.user_login, clon.user_level, clon_ip, clon_date from users, users as clon, clones where users.user_login = %s and clon_from = users.user_id and clon_to = clon.user_id and clon_date > date_sub(now(), interval 60 day)""" cursor.execute(query, (user, )) for clon, level, ip, date in cursor: print("%-16s\t%s\t%s\t%s" % (clon, ip, level, date))
def main(): global configuration ip = configuration.IP cursor = DBM.cursor() query = """ SELECT distinct user_login, user_email, user_level, clon_ip FROM users, clones WHERE (clon_ip LIKE %s OR clon_ip LIKE %s) AND (clon_from = user_id OR clon_to = user_id)""" cursor.execute(query, ("%s%%" % ip, "COOK:%s%%" % ip)) for user, email, level, ip in cursor: print("%-16s\t%s\t%s\t%s" % (user, email, ip, level))
def main(): global configuration user = configuration.user cursor = DBM.cursor() query = """select distinct clon.user_login, clon.user_level, clon_ip, clon_date from users, users as clon, clones where users.user_login = %s and clon_from = users.user_id and clon_to = clon.user_id and clon_date > date_sub(now(), interval 60 day)""" cursor.execute(query, (user,)) for clon, level, ip, date in cursor: print("%-16s\t%s\t%s\t%s" % (clon, ip, level, date))
def main(): cursor = DBM.cursor() query = """select distinct clon.user_login, clon.user_login_register, users.user_login, users.user_login_register, clon.user_level, clon_ip, clon_date from users, users as clon, clones where clon_from = users.user_id and clon_to = clon.user_id and clon_date > date_sub(now(), interval 60 day)""" cursor.execute(query) print("%-16s (%-20s)\t%-16s (%-20s)\t%-20s\t%-12s\t%s" % ("clon", "clonreg", "user", "userreg", "ip", "level", "date")) print( "---------------------------------------------------------------------------------------------------------------------------------------------" ) for clon, clonreg, user, userreg, level, ip, date in cursor: print("%-16s (%-20s)\t%-16s (%-20s)\t%-20s\t%-12s\t%s" % (clon, clonreg, user, userreg, ip, level, date))
def main(): global configuration user = configuration.user cursor = DBM.cursor() seen = set() query = """select vote_ip_int, vote_date from users, votes where user_login=%s and vote_type in ('links', 'comments', 'posts') and vote_user_id=user_id order by vote_date desc""" cursor.execute(query, (user, )) c = 0 for ip_int, date in cursor: if ip_int not in seen and ip_int > 0: print("%s\t%s" % (ipaddr.IPAddress(long(ip_int)), date)) seen.add(ip_int) c += 1 if c > 20: break
def main(): global configuration user = configuration.user cursor = DBM.cursor() seen = set() query = """select vote_ip_int, vote_date from users, votes where user_login=%s and vote_type in ('links', 'comments', 'posts') and vote_user_id=user_id order by vote_date desc""" cursor.execute(query, (user,)) c = 0 for ip_int, date in cursor: if ip_int not in seen and ip_int > 0: print("%s\t%s" % (ipaddr.IPAddress(long(ip_int)), date)) seen.add(ip_int) c += 1 if c > 20: break
def get_link_average(link_id): """ Get the average weight of a link """ votes = {} values_sum = 0 values_count = 0 cursor = DBM.cursor() query = """ select vote_user_id, vote_value from votes, links where vote_type = 'links' and vote_link_id = %s and vote_user_id > 0 and vote_value > 0 and link_id = vote_link_id and ( (link_status = 'published' and vote_date < link_date) OR link_status != 'published') """ cursor.execute(query, (link_id, )) for user_id, vote_value in cursor: votes[user_id] = int(vote_value / abs(vote_value)) sorted_users = [(minor, major) for (minor, major) in itertools.product(votes, repeat=2) if major > minor] for values_count, (minor, major) in enumerate(sorted_users, start=1): query = """ select value, UNIX_TIMESTAMP(date) from users_similarities where minor = %s and major = %s """ cursor.execute(query, (minor, major)) row = cursor.fetchone() values_sum += 0 if row is None else row[0] print values_sum, values_count average = values_sum / values_count return average
def get_link_average(link_id): """ Get the average weight of a link """ votes = {} values_sum = 0 values_count = 0 cursor = DBM.cursor() query = """ select vote_user_id, vote_value from votes, links where vote_type = 'links' and vote_link_id = %s and vote_user_id > 0 and vote_value > 0 and link_id = vote_link_id and ( (link_status = 'published' and vote_date < link_date) OR link_status != 'published') """ cursor.execute(query, (link_id, )) for user_id, vote_value in cursor: votes[user_id] = int(vote_value / abs(vote_value)) sorted_users = [(minor, major) for (minor, major) in itertools.product(votes, repeat= 2) if major > minor] for values_count, (minor, major) in enumerate(sorted_users, start = 1): query = """ select value, UNIX_TIMESTAMP(date) from users_similarities where minor = %s and major = %s """ cursor.execute(query, (minor, major)) row = cursor.fetchone() values_sum += 0 if row is None else row[0] print values_sum, values_count average = values_sum/values_count return average
def main(): """ Main loop, processing the top 20 published links""" if len(sys.argv) == 2: link_id = int(sys.argv[1]) print get_link_average(link_id) else: total = 0 average = 0 cursor = DBM.cursor() query = """ select link_id from links where link_status = 'published' order by link_date desc limit 20" """ cursor.execute(query) for total, link_id in enumerate(cursor, start=1): average += get_link_average(link_id) assert total > 0, "No published links." print average / total
def get_candidate_blogs(days, min_karma): """ Get the possible blog we can read """ now = time.time() blogs = set() results = set() blogs_ids = set() users_ids = set() cursor = DBM.cursor() inner_cursor = DBM.cursor() # Select users that have at least one published query = """ SELECT link_blog, blog_url, blog_feed, UNIX_TIMESTAMP(blog_feed_checked), UNIX_TIMESTAMP(blog_feed_read) FROM links, blogs WHERE link_status in ('published') AND link_date > date_sub(now(), interval %s day) AND blog_id = link_blog AND blog_type='blog' AND (blog_feed_read is null OR blog_feed_read < date_sub(now(), interval 1 hour)) GROUP BY blog_id HAVING count(*) < %s """ cursor.execute(query, (days, days)) for row in cursor: blog = BaseBlogs() blog.id, blog.url, blog.feed, blog.checked, blog.read = row blog.base_url = blog.url.replace('http://', '').\ replace('https://', '').replace('www.', '') if blog.is_banned(): continue query = """ SELECT user_login, user_id, user_karma FROM users WHERE user_url in (%s, %s, %s, %s, %s, %s) AND user_karma > %s AND user_level not in ('disabled', 'autodisabled') ORDER BY user_karma desc limit 1" """ inner_cursor.execute(query,('http://'+blog.base_url, 'http://www.'+blog.base_url, 'http://'+blog.base_url+'/', 'http://www.'+blog.base_url+'/', blog.base_url, 'www.'+blog.base_url, min_karma)) result = inner_cursor.fetchone() if result: blog.user, blog.user_id, blog.karma = result blogs.add(blog) blogs_ids.add(blog.id) users_ids.add(blog.user_id) # Select active users that have no published posts query = """ SELECT blog_id, blog_url, blog_feed, UNIX_TIMESTAMP(blog_feed_checked), UNIX_TIMESTAMP(blog_feed_read), user_login, user_id, user_karma FROM users, blogs WHERE user_karma >= %s AND user_url like 'http://%%' AND user_level not in ('disabled', 'autodisabled') AND user_modification > date_sub(now(), interval %s day) AND user_date < date_sub(now(), interval %s day) AND blog_url in ( concat('http://www.',replace(replace(user_url, 'http://', ''), 'www.', '')), concat('http://',replace(replace(user_url, 'http://', ''), 'www.', '')), concat('http://www.',replace(replace(user_url, 'http://', ''), 'www.', ''), '/'), concat('http://',replace(replace(user_url, 'http://', ''), 'www.', ''), '/') ) AND (blog_feed_read is null or blog_feed_read < date_sub(now(), interval 1 hour)) order by blog_id desc, user_karma desc """ cursor.execute(query, (dbconf.blogs['active_min_karma'], dbconf.blogs['active_min_activity'], dbconf.blogs['active_min_age']) ) for row in cursor: blog = BaseBlogs() blog.id, blog.url, blog.feed, \ blog.checked, blog.read, blog.user, blog.user_id, blog.karma = row blog.base_url = blog.url.replace('http://', '').\ replace('https://', '').replace('www.', '') if blog.id not in blogs_ids and blog.user_id not in users_ids: blogs.add(blog) users_ids.add(blog.user_id) blogs_ids.add(blog.id) feeds_read = 0 # Sort the set of blogs by date of read ## TODO: This sort should be changed with rich comparators in BaseBlog sorted_blogs = sorted(blogs, key=lambda x: x.read) for blog in sorted_blogs: if feeds_read >= dbconf.blogs['max_feeds']: break ## TODO: Solve this with a list comprehension if not blog.is_banned(): # Check the number of remaining entries query = """ SELECT count(*) FROM rss WHERE user_id = %s AND date > date_sub(now(), interval 1 day) """ inner_cursor.execute(query, (blog.user_id,)) n_entries, = inner_cursor.fetchone() # Calculate the number of remaining entries blog.max = int(round(blog.karma/dbconf.blogs['karma_divisor'])) \ - n_entries if not blog.max > 0: print "Max entries <= 0:", n_entries, blog.karma, blog.url continue if (not blog.feed and (not blog.checked or blog.checked < now - 86400)) \ or (blog.checked and blog.checked < now - 86400*7): blog.get_feed_info() if blog.feed and (not blog.read or blog.read < now - 3600): results.add(blog) print "Added ", blog.id, blog.user, blog.url feeds_read += 1 cursor.close() return results
def main(): """ Main loop of top-news """ cursor = DBM.cursor() cursor.execute("select id, name from subs where enabled = 1") for row in cursor: do_site(row[1])
def do_site(site): """ Process a given site """ links = {} cursor = DBM.cursor() query = """ select link_id, link_uri, unix_timestamp(now()) - unix_timestamp(link_date) from links, subs, sub_statuses where subs.name = %s and subs.id = sub_statuses.id and status = 'published' and date > date_sub(now(), interval 24 hour) and link = link_id and link_votes/20 > link_negatives order by link_date desc """ cursor.execute(query, (site,)) links_total = 0 for link_id, link_uri, old in cursor: links_total += 1 values = {} values['uri'] = link_uri # How old in seconds values['old'] = old values['w'] = 0 values['c'] = 0 values['v'] = 0 values['links_order'] = links_total links[link_id] = values if not links_total: return links_format = ','.join(['%s'] * len(links)) query = """ select vote_link_id, sum((1-(unix_timestamp(now()) - unix_timestamp(vote_date))/36000)) as x, count(*) from votes where vote_link_id in (%s) and vote_type='links' and vote_date > date_sub(now(), interval 12 hour) and vote_user_id > 0 and vote_value > 6.1 group by vote_link_id order by x desc """ % links_format cursor.execute(query, tuple(links)) votes_total = 0 votes_links = 0 v_total = 0 v_list = {} for link_id, old, votes in cursor: votes_links += 1 votes_old = float(old) links[link_id]['v'] = votes_old v_total += votes_old v_list[link_id] = votes_old links[link_id]['votes'] = votes votes_total += votes links[link_id]['votes_order'] = votes_links if not votes_links: return v_average = v_total/votes_links votes_average = votes_total/votes_links query = """ select comment_link_id, sum(1.5*(1-(unix_timestamp(now()) - unix_timestamp(comment_date))/36000)), count(*) from comments where comment_link_id in (%s) and comment_date > date_sub(now(), interval 12 hour) group by comment_link_id """ % links_format cursor.execute(query, tuple(links)) comments_total = 0 comments_links = 0 c_total = 0 c_list = {} for link_id, old, count in cursor: comment_old = float(old) comments_links += 1 links[link_id]['c'] = comment_old c_total += comment_old c_list[link_id] = comment_old links[link_id]['comments'] = count comments_total += count if not comments_links: return c_average = c_total/comments_links comments_average = comments_total/comments_links query = """ select id, counter from link_clicks where id in (%s) """ % links_format cursor.execute(query, tuple(links)) for link_id, clicks in cursor: links[link_id]['clicks'] = clicks cursor.close() print "Site:", site, "Votes average:", votes_average, v_average, \ "Comments average:", comments_average, c_average for link_id, link_value in links.items(): if link_value['c'] > 0 \ and link_value['v'] > 0 \ and 'clicks' in link_value: links[link_id]['w'] = (1 - link_value['old']/(1.5*86400)) \ * (link_value['v'] \ + link_value['c'] \ + link_value['clicks'] \ * (1 - link_value['old']/86400) * 0.01) sorted_ids = sorted(links, cmp=lambda x, y: cmp(links[y]['w'], links[x]['w'])) if sorted_ids: annotations = ','.join([unicode(x) for x in sorted_ids[:10]]) cursor_update = DBM.cursor('update') query = """ replace into annotations (annotation_key, annotation_expire, annotation_text) values (%s, date_add(now(), interval 15 minute), %s) """ cursor_update.execute(query, ('top-actives-'+site, annotations)) cursor_update.close() DBM.commit() i = 0 for key in sorted_ids: if links[key]['w'] > 0 and i < 10: i += 1 # Select the top stories annotations = ','.join([unicode(x) for x in sorted_ids if links[x]['w'] > dbconf.tops['min-weight'] and (links[x]['links_order'] > 1 or links[x]['old'] > 3600) and links[x]['c'] > c_avrg(c_list, x) * 4 and links[x]['v'] > c_avrg(v_list, x) * 4 and links[x]['votes_order'] <= 10 ]) print "SELECT: ", site, annotations if annotations: cursor_update = DBM.cursor('update') query = """ replace into annotations (annotation_key, annotation_expire, annotation_text) values (%s, date_add(now(), interval 10 minute), %s) """ cursor_update.execute(query, ('top-link-'+site, annotations)) cursor_update.close() DBM.commit() print "Stored:", annotations else: print "No one selected"
def main(): """ Main loop of the process """ # timeout in seconds timeout = 10 socket.setdefaulttimeout(timeout) print "------------------------------ BEGIN FEEDS UPDATE -", time.strftime( "%c"), "UTC ------------------------------" # Delete old entries update_cursor = DBM.cursor('update') query = """ DELETE FROM rss WHERE date_parsed < date_sub(now(), interval %s day) """ print "Deleting old entries" update_cursor.execute(query, (dbconf.blogs['days_to_keep'], )) DBM.commit() update_cursor.close() """ Get the possible blog we can read """ now = time.time() cursor = DBM.cursor() query = """ SELECT blog_id, blog_url, blog_feed, UNIX_TIMESTAMP(blog_feed_checked), UNIX_TIMESTAMP(blog_feed_read) FROM sub_statuses, links, blogs WHERE (id = 1 AND status = "published" AND date > date_sub(now(), interval %s day) AND link_id = link AND blog_id = link_blog AND blog_feed_checked is not null AND blog_type <> 'disabled' AND blog_feed is not null) UNION SELECT blog_id, blog_url, blog_feed, UNIX_TIMESTAMP(blog_feed_checked), UNIX_TIMESTAMP(blog_feed_read) FROM blogs WHERE blog_type = 'aggregator' GROUP BY blog_id """ feeds_read = 0 print "Reading feeds..." cursor.execute(query, (dbconf.blogs['days_blogs'], )) for row in cursor: blog = BaseBlogs() blog.id, blog.url, blog.feed, blog.checked, blog.read = row blog.user_id = 0 blog.base_url = blog.url.replace('http://', '').replace('https://', '').replace('www.', '') if blog.is_banned(): continue print " >>> Reading: %s (%s)" % (blog.url, blog.feed) entries = blog.read_feed() print " Blog ", blog.id, " has ", entries, " entries %s" % blog.url feeds_read += 1 cursor.close() print "------------------------------ END - ", feeds_read, " feeds read - ", time.strftime( "%c"), "UTC ------------------------------"
def main(): """ Main loop of the process """ # timeout in seconds timeout = 10 socket.setdefaulttimeout(timeout) print "------------------------------ BEGIN RSS CHECK -", time.strftime( "%c"), " UTC ------------------------------" """ Get the possible blog we can read """ now = time.time() blogs = set() results = set() cursor = DBM.cursor() #query = """ # SELECT blog_id, blog_url, blog_feed, # UNIX_TIMESTAMP(blog_feed_checked), # UNIX_TIMESTAMP(blog_feed_read) # FROM sub_statuses, links, blogs # WHERE # (id = 1 AND status = "published" AND date > date_sub(now(), interval %s day) # AND link_id = link # AND blog_id = link_blog # AND blog_type not in ('disabled', 'aggregator') # AND (blog_feed_checked is null OR blog_feed_checked < date_sub(now(), interval %s day))) # UNION # SELECT blog_id, blog_url, blog_feed, # UNIX_TIMESTAMP(blog_feed_checked), # UNIX_TIMESTAMP(blog_feed_read) # FROM blogs # WHERE blog_type = 'aggregator' # GROUP BY blog_id #""" query = """ SELECT blog_id, blog_url, blog_feed, UNIX_TIMESTAMP(blog_feed_checked), UNIX_TIMESTAMP(blog_feed_read) FROM sub_statuses, links, blogs WHERE (id = 1 AND status = "published" AND date > date_sub(now(), interval %s day) AND link_id = link AND blog_id = link_blog AND blog_type not in ('disabled', 'aggregator') AND (blog_feed_checked is null OR blog_feed_checked < date_sub(now(), interval %s day))) GROUP BY blog_id """ cursor.execute( query, (dbconf.blogs['days_blogs'], dbconf.blogs['days_blogs_checked'])) for row in cursor: blog = BaseBlogs() blog.id, blog.url, blog.feed, blog.checked, blog.read = row blog.user_id = 0 blog.base_url = blog.url.replace('http://', '').replace('https://', '').replace('www.', '') if blog.is_banned(): continue blogs.add(blog) cursor.close() print("Checking blogs: (%s)" % len(blogs)) feeds_read = 0 # Sort the set of blogs by date of read sorted_blogs = sorted(blogs, key=lambda x: x.read) for blog in sorted_blogs: if not blog.is_banned(): blog.get_feed_info() if blog.feed: print " > Added ", blog.id, blog.url, blog.feed feeds_read += 1 print "------------------------------ END - Blogs added: ", feeds_read, " - ", time.strftime( "%c"), " UTC ------------------------------"
def main(): """ Main loop of the process """ # timeout in seconds timeout = 10 socket.setdefaulttimeout(timeout) print "------------------------------ BEGIN RSS CHECK -", time.strftime("%c"), " UTC ------------------------------" """ Get the possible blog we can read """ now = time.time() blogs = set() results = set() cursor = DBM.cursor() #query = """ # SELECT blog_id, blog_url, blog_feed, # UNIX_TIMESTAMP(blog_feed_checked), # UNIX_TIMESTAMP(blog_feed_read) # FROM sub_statuses, links, blogs # WHERE # (id = 1 AND status = "published" AND date > date_sub(now(), interval %s day) # AND link_id = link # AND blog_id = link_blog # AND blog_type not in ('disabled', 'aggregator') # AND (blog_feed_checked is null OR blog_feed_checked < date_sub(now(), interval %s day))) # UNION # SELECT blog_id, blog_url, blog_feed, # UNIX_TIMESTAMP(blog_feed_checked), # UNIX_TIMESTAMP(blog_feed_read) # FROM blogs # WHERE blog_type = 'aggregator' # GROUP BY blog_id #""" query = """ SELECT blog_id, blog_url, blog_feed, UNIX_TIMESTAMP(blog_feed_checked), UNIX_TIMESTAMP(blog_feed_read) FROM sub_statuses, links, blogs WHERE (id = 1 AND status = "published" AND date > date_sub(now(), interval %s day) AND link_id = link AND blog_id = link_blog AND blog_type not in ('disabled', 'aggregator') AND (blog_feed_checked is null OR blog_feed_checked < date_sub(now(), interval %s day))) GROUP BY blog_id """ cursor.execute(query, (dbconf.blogs['days_blogs'], dbconf.blogs['days_blogs_checked'])) for row in cursor: blog = BaseBlogs() blog.id, blog.url, blog.feed, blog.checked, blog.read = row blog.user_id = 0 blog.base_url = blog.url.replace('http://', '').replace('https://', '').replace('www.', '') if blog.is_banned(): continue blogs.add(blog) cursor.close() print("Checking blogs: (%s)" % len(blogs)) feeds_read = 0 # Sort the set of blogs by date of read sorted_blogs = sorted(blogs, key=lambda x: x.read) for blog in sorted_blogs: if not blog.is_banned(): blog.get_feed_info() if blog.feed: print " > Added ", blog.id, blog.url, blog.feed feeds_read += 1 print "------------------------------ END - Blogs added: ", feeds_read, " - ", time.strftime("%c"), " UTC ------------------------------"
def get_candidate_blogs(days, min_karma): """ Get the possible blog we can read """ now = time.time() blogs = set() results = set() blogs_ids = set() users_ids = set() cursor = DBM.cursor() inner_cursor = DBM.cursor() # Select users that have at least one published query = """ SELECT link_blog, blog_url, blog_feed, UNIX_TIMESTAMP(blog_feed_checked), UNIX_TIMESTAMP(blog_feed_read) FROM sub_statuses, links, blogs WHERE id = 1 AND status = "published" AND date > date_sub(now(), interval %s day) AND link_id = link AND blog_id = link_blog AND blog_type in ('blog', 'noiframe') AND (blog_feed_read is null OR blog_feed_read < date_sub(now(), interval 1 hour)) GROUP BY blog_id HAVING count(*) < %s """ cursor.execute(query, (days, days / 3)) for row in cursor: blog = BaseBlogs() blog.id, blog.url, blog.feed, blog.checked, blog.read = row blog.base_url = blog.url.replace('http://', '').\ replace('https://', '').replace('www.', '') if blog.is_banned(): continue query = """ SELECT user_login, user_id, user_karma FROM users USE INDEX (user_url) WHERE user_url in (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) AND user_karma > %s AND user_level not in ('disabled', 'autodisabled') ORDER BY user_karma desc limit 1 """ inner_cursor.execute( query, ('http://' + blog.base_url, 'http://www.' + blog.base_url, 'http://' + blog.base_url + '/', 'http://www.' + blog.base_url + '/', 'https://' + blog.base_url, 'https://www.' + blog.base_url, 'https://' + blog.base_url + '/', 'https://www.' + blog.base_url + '/', blog.base_url, 'www.' + blog.base_url, min_karma)) result = inner_cursor.fetchone() if result: blog.user, blog.user_id, blog.karma = result blogs.add(blog) blogs_ids.add(blog.id) users_ids.add(blog.user_id) print("End published blogs (%s)" % len(blogs)) # Select active users that have no published posts query = """ SELECT blog_id, blog_url, blog_feed, UNIX_TIMESTAMP(blog_feed_checked), UNIX_TIMESTAMP(blog_feed_read), user_login, user_id, user_karma FROM users, blogs WHERE user_karma >= %s AND user_url like 'http%%' AND user_level not in ('disabled', 'autodisabled') AND user_modification > date_sub(now(), interval %s day) AND user_date < date_sub(now(), interval %s day) AND blog_url in ( concat('http://www.',replace(replace(user_url, 'http://', ''), 'www.', '')), concat('http://',replace(replace(user_url, 'http://', ''), 'www.', '')), concat('http://www.',replace(replace(user_url, 'http://', ''), 'www.', ''), '/'), concat('http://',replace(replace(user_url, 'http://', ''), 'www.', ''), '/') ) AND (blog_feed_read is null or blog_feed_read < date_sub(now(), interval 1 hour)) order by blog_id desc, user_karma desc """ print( query % (dbconf.blogs['active_min_karma'], dbconf.blogs['active_min_activity'], dbconf.blogs['active_min_age'])) cursor.execute( query, (dbconf.blogs['active_min_karma'], dbconf.blogs['active_min_activity'], dbconf.blogs['active_min_age'])) for row in cursor: blog = BaseBlogs() blog.id, blog.url, blog.feed, \ blog.checked, blog.read, blog.user, blog.user_id, blog.karma = row blog.base_url = blog.url.replace('http://', '').\ replace('https://', '').replace('www.', '') if blog.id not in blogs_ids and blog.user_id not in users_ids: blogs.add(blog) users_ids.add(blog.user_id) blogs_ids.add(blog.id) feeds_read = 0 # Sort the set of blogs by date of read ## TODO: This sort should be changed with rich comparators in BaseBlog sorted_blogs = sorted(blogs, key=lambda x: x.read) for blog in sorted_blogs: if feeds_read >= dbconf.blogs['max_feeds']: break ## TODO: Solve this with a list comprehension if not blog.is_banned(): # Check the number of remaining entries query = """ SELECT count(*) FROM rss WHERE user_id = %s AND date > date_sub(now(), interval 1 day) """ inner_cursor.execute(query, (blog.user_id, )) n_entries, = inner_cursor.fetchone() # Calculate the number of remaining entries blog.max = int(round(blog.karma/dbconf.blogs['karma_divisor'])) \ - n_entries if not blog.max > 0: print "Max entries <= 0:", n_entries, blog.karma, blog.url continue if (not blog.feed and (not blog.checked or blog.checked < now - 86400)) \ or (blog.checked and blog.checked < now - 86400*7): blog.get_feed_info() if blog.feed and (not blog.read or blog.read < now - 3600): results.add(blog) print "Added ", blog.id, blog.user, blog.url feeds_read += 1 cursor.close() return results
def main(): """ Main loop of the process """ # timeout in seconds timeout = 10 socket.setdefaulttimeout(timeout) print "------------------------------ BEGIN FEEDS UPDATE -", time.strftime("%c"), "UTC ------------------------------" # Delete old entries update_cursor = DBM.cursor('update') query = """ DELETE FROM rss WHERE date_parsed < date_sub(now(), interval %s day) """ print "Deleting old entries" update_cursor.execute(query, (dbconf.blogs['days_to_keep'],)) DBM.commit() update_cursor.close() """ Get the possible blog we can read """ now = time.time() cursor = DBM.cursor() query = """ SELECT blog_id, blog_url, blog_feed, UNIX_TIMESTAMP(blog_feed_checked), UNIX_TIMESTAMP(blog_feed_read) FROM sub_statuses, links, blogs WHERE (id = 1 AND status = "published" AND date > date_sub(now(), interval %s day) AND link_id = link AND blog_id = link_blog AND blog_feed_checked is not null AND blog_type <> 'disabled' AND blog_feed is not null) UNION SELECT blog_id, blog_url, blog_feed, UNIX_TIMESTAMP(blog_feed_checked), UNIX_TIMESTAMP(blog_feed_read) FROM blogs WHERE blog_type = 'aggregator' GROUP BY blog_id """ feeds_read = 0 print "Reading feeds..." cursor.execute(query, (dbconf.blogs['days_blogs'],)) for row in cursor: blog = BaseBlogs() blog.id, blog.url, blog.feed, blog.checked, blog.read = row blog.user_id = 0 blog.base_url = blog.url.replace('http://', '').replace('https://', '').replace('www.', '') if blog.is_banned(): continue print " >>> Reading: %s (%s)" % (blog.url, blog.feed) entries = blog.read_feed() print " Blog ", blog.id, " has ", entries, " entries %s" % blog.url feeds_read += 1 cursor.close() print "------------------------------ END - ", feeds_read, " feeds read - ", time.strftime("%c"), "UTC ------------------------------"
def main(): global configurations activity = {} seen_ips = {} # Delete old entries update_cursor = DBM.cursor('update') query = """ DELETE FROM clones WHERE clon_date < date_sub(now(), interval 120 day) """ update_cursor.execute(query) DBM.commit() if configuration.hours: minutes = configuration.hours * 60 elif configuration.minutes: minutes = configuration.minutes print "Analyzing IPs for %d minutes" % minutes cursor = DBM.cursor() queries = ( """select distinct vote_user_id, vote_ip_int from votes where vote_type in ('links', 'comments', 'posts') and vote_user_id != 0 and vote_date > date_sub(now(), interval %s minute)""", """select distinct comment_user_id, comment_ip_int from comments where comment_date > date_sub(now(), interval %s minute)""" ) for query in queries: cursor.execute(query, (minutes, )) for uid, ip_int in cursor: ip = IPAddress(ip_int) add_user_ip(uid, ip, activity) #print uid, ip_int, ip search_from = int(30 * 24 + (minutes * 60)) print "Analyzing history for %d hours" % search_from clones = set() ips_counter = {} for u, ips in activity.iteritems(): # To avoid warning of truncated DOUBLE, the list of decimals is passed directly to the mysql driver format_strings = ','.join(['%s'] * len(ips)) query = """select distinct vote_user_id, vote_ip_int from votes where vote_ip_int in (%s) """ % format_strings query += """and vote_user_id != %d and vote_user_id > 0 and vote_date > date_sub(now(), interval %d hour)""" % ( u, search_from) cursor.execute(query, tuple(ips)) for clon, ip_int in cursor: ip = IPAddress(ip_int) # print u, clon, ip clones.add((u, clon, ip)) subnet = IPSubnet(ip) if subnet not in ips_counter: ips_counter[subnet] = 1 else: ips_counter[subnet] += 1 #print clones, ips_counter c = 0 for u, clon, ip in clones: subnet = IPSubnet(ip) if ips_counter[subnet] < 30: print "Clon:", u, clon, ip, ips_counter[subnet] insert = """REPLACE INTO clones (clon_from, clon_to, clon_ip) VALUES (%s, %s, %s)""" update_cursor.execute(insert, (u, clon, ip)) insert = """INSERT IGNORE INTO clones (clon_to, clon_from, clon_ip) VALUES (%s, %s, %s)""" update_cursor.execute(insert, (u, clon, ip)) c += 1 if c % 10 == 0: DBM.commit() else: print "Rejected: ", str(ip), subnet, ips_counter[subnet] DBM.commit()
def main(): global configurations activity = {} seen_ips = {} # Delete old entries update_cursor = DBM.cursor('update') query = """ DELETE FROM clones WHERE clon_date < date_sub(now(), interval 120 day) """ update_cursor.execute(query) DBM.commit() if configuration.hours: minutes = configuration.hours * 60 elif configuration.minutes: minutes = configuration.minutes print "Analyzing IPs for %d minutes" % minutes cursor = DBM.cursor() queries = ( """select distinct vote_user_id, vote_ip_int from votes where vote_type in ('links', 'comments', 'posts') and vote_user_id != 0 and vote_date > date_sub(now(), interval %s minute)""", """select distinct comment_user_id, comment_ip_int from comments where comment_date > date_sub(now(), interval %s minute)""" ) for query in queries: cursor.execute(query, (minutes,)) for uid, ip_int in cursor: ip = IPAddress(ip_int) add_user_ip(uid, ip, activity) #print uid, ip_int, ip search_from = int(30*24 + (minutes*60)); print "Analyzing history for %d hours" % search_from clones = set() ips_counter = {} for u, ips in activity.iteritems(): # To avoid warning of truncated DOUBLE, the list of decimals is passed directly to the mysql driver format_strings = ','.join(['%s'] * len(ips)) query = """select distinct vote_user_id, vote_ip_int from votes where vote_ip_int in (%s) """ % format_strings query += """and vote_user_id != %d and vote_user_id > 0 and vote_date > date_sub(now(), interval %d hour)""" % (u, search_from) cursor.execute(query, tuple(ips)) for clon, ip_int in cursor: ip = IPAddress(ip_int) # print u, clon, ip clones.add((u, clon, ip)) subnet = IPSubnet(ip) if subnet not in ips_counter: ips_counter[subnet] = 1 else: ips_counter[subnet] += 1 #print clones, ips_counter c = 0 for u, clon, ip in clones: subnet = IPSubnet(ip) if ips_counter[subnet] < 30: print "Clon:", u, clon, ip, ips_counter[subnet] insert = """REPLACE INTO clones (clon_from, clon_to, clon_ip) VALUES (%s, %s, %s)""" update_cursor.execute(insert, (u, clon, ip)) insert = """INSERT IGNORE INTO clones (clon_to, clon_from, clon_ip) VALUES (%s, %s, %s)""" update_cursor.execute(insert, (u, clon, ip)) c += 1 if c % 10 == 0: DBM.commit() else: print "Rejected: ", str(ip), subnet, ips_counter[subnet] DBM.commit()