def main(): """ Main loop of the process """ # timeout in seconds timeout = 10 socket.setdefaulttimeout(timeout) # Delete old entries update_cursor = DBM.cursor('update') query = """ DELETE FROM rss WHERE date < date_sub(now(), interval %s day) """ update_cursor.execute(query, (dbconf.blogs['days_to_keep'],)) DBM.commit() update_cursor.close() users = set() news = set() blogs = get_candidate_blogs(dbconf.blogs['days_published'], dbconf.blogs['min_karma']) for blog in blogs: entries = blog.read_feed() time.sleep(3) if entries > 0: users.add(blog.user) news.add(blog) if dbconf.blogs['post_user'] and dbconf.blogs['post_key'] and users: post = _('Nuevo apunte en el blog de: ') for note in news: post += "@" + note.user for link in note.links: post += " " + link post += "\n" post += '\nhttp://'+dbconf.domain+dbconf.blogs['viewer']+" #blogs" print post try: url = """ http://{d}{newpost}?user={post_user}&key={post_key}&text={t} """.format(d= dbconf.domain, t= urllib.quote_plus(post), **dbconf.blogs) ## TODO: Use timeout parameter instead of ## socket.setdefaulttimeout(timeout) urlpost = urllib2.urlopen(url) print urlpost.read(100) urlpost.close() except KeyError: print "Error posting", url pass
def do_site(site): """ Process a given site """ links = {} cursor = DBM.cursor() query = """ select link_id, link_uri, unix_timestamp(now()) - unix_timestamp(link_date) from links, subs, sub_statuses where subs.name = %s and subs.id = sub_statuses.id and status = 'published' and date > date_sub(now(), interval 24 hour) and link = link_id and link_votes/20 > link_negatives order by link_date desc """ cursor.execute(query, (site,)) links_total = 0 for link_id, link_uri, old in cursor: links_total += 1 values = {} values['uri'] = link_uri # How old in seconds values['old'] = old values['w'] = 0 values['c'] = 0 values['v'] = 0 values['links_order'] = links_total links[link_id] = values if not links_total: return links_format = ','.join(['%s'] * len(links)) query = """ select vote_link_id, sum((1-(unix_timestamp(now()) - unix_timestamp(vote_date))/36000)) as x, count(*) from votes where vote_link_id in (%s) and vote_type='links' and vote_date > date_sub(now(), interval 12 hour) and vote_user_id > 0 and vote_value > 6.1 group by vote_link_id order by x desc """ % links_format cursor.execute(query, tuple(links)) votes_total = 0 votes_links = 0 v_total = 0 v_list = {} for link_id, old, votes in cursor: votes_links += 1 votes_old = float(old) links[link_id]['v'] = votes_old v_total += votes_old v_list[link_id] = votes_old links[link_id]['votes'] = votes votes_total += votes links[link_id]['votes_order'] = votes_links if not votes_links: return v_average = v_total/votes_links votes_average = votes_total/votes_links query = """ select comment_link_id, sum(1.5*(1-(unix_timestamp(now()) - unix_timestamp(comment_date))/36000)), count(*) from comments where comment_link_id in (%s) and comment_date > date_sub(now(), interval 12 hour) group by comment_link_id """ % links_format cursor.execute(query, tuple(links)) comments_total = 0 comments_links = 0 c_total = 0 c_list = {} for link_id, old, count in cursor: comment_old = float(old) comments_links += 1 links[link_id]['c'] = comment_old c_total += comment_old c_list[link_id] = comment_old links[link_id]['comments'] = count comments_total += count if not comments_links: return c_average = c_total/comments_links comments_average = comments_total/comments_links query = """ select id, counter from link_clicks where id in (%s) """ % links_format cursor.execute(query, tuple(links)) for link_id, clicks in cursor: links[link_id]['clicks'] = clicks cursor.close() print "Site:", site, "Votes average:", votes_average, v_average, \ "Comments average:", comments_average, c_average for link_id, link_value in links.items(): if link_value['c'] > 0 \ and link_value['v'] > 0 \ and 'clicks' in link_value: links[link_id]['w'] = (1 - link_value['old']/(1.5*86400)) \ * (link_value['v'] \ + link_value['c'] \ + link_value['clicks'] \ * (1 - link_value['old']/86400) * 0.01) sorted_ids = sorted(links, cmp=lambda x, y: cmp(links[y]['w'], links[x]['w'])) if sorted_ids: annotations = ','.join([unicode(x) for x in sorted_ids[:10]]) cursor_update = DBM.cursor('update') query = """ replace into annotations (annotation_key, annotation_expire, annotation_text) values (%s, date_add(now(), interval 15 minute), %s) """ cursor_update.execute(query, ('top-actives-'+site, annotations)) cursor_update.close() DBM.commit() i = 0 for key in sorted_ids: if links[key]['w'] > 0 and i < 10: i += 1 # Select the top stories annotations = ','.join([unicode(x) for x in sorted_ids if links[x]['w'] > dbconf.tops['min-weight'] and (links[x]['links_order'] > 1 or links[x]['old'] > 3600) and links[x]['c'] > c_avrg(c_list, x) * 4 and links[x]['v'] > c_avrg(v_list, x) * 4 and links[x]['votes_order'] <= 10 ]) print "SELECT: ", site, annotations if annotations: cursor_update = DBM.cursor('update') query = """ replace into annotations (annotation_key, annotation_expire, annotation_text) values (%s, date_add(now(), interval 10 minute), %s) """ cursor_update.execute(query, ('top-link-'+site, annotations)) cursor_update.close() DBM.commit() print "Stored:", annotations else: print "No one selected"
def main(): """ Main loop of the process """ # timeout in seconds timeout = 10 socket.setdefaulttimeout(timeout) print "------------------------------ BEGIN FEEDS UPDATE -", time.strftime( "%c"), "UTC ------------------------------" # Delete old entries update_cursor = DBM.cursor('update') query = """ DELETE FROM rss WHERE date_parsed < date_sub(now(), interval %s day) """ print "Deleting old entries" update_cursor.execute(query, (dbconf.blogs['days_to_keep'], )) DBM.commit() update_cursor.close() """ Get the possible blog we can read """ now = time.time() cursor = DBM.cursor() query = """ SELECT blog_id, blog_url, blog_feed, UNIX_TIMESTAMP(blog_feed_checked), UNIX_TIMESTAMP(blog_feed_read) FROM sub_statuses, links, blogs WHERE (id = 1 AND status = "published" AND date > date_sub(now(), interval %s day) AND link_id = link AND blog_id = link_blog AND blog_feed_checked is not null AND blog_type <> 'disabled' AND blog_feed is not null) UNION SELECT blog_id, blog_url, blog_feed, UNIX_TIMESTAMP(blog_feed_checked), UNIX_TIMESTAMP(blog_feed_read) FROM blogs WHERE blog_type = 'aggregator' GROUP BY blog_id """ feeds_read = 0 print "Reading feeds..." cursor.execute(query, (dbconf.blogs['days_blogs'], )) for row in cursor: blog = BaseBlogs() blog.id, blog.url, blog.feed, blog.checked, blog.read = row blog.user_id = 0 blog.base_url = blog.url.replace('http://', '').replace('https://', '').replace('www.', '') if blog.is_banned(): continue print " >>> Reading: %s (%s)" % (blog.url, blog.feed) entries = blog.read_feed() print " Blog ", blog.id, " has ", entries, " entries %s" % blog.url feeds_read += 1 cursor.close() print "------------------------------ END - ", feeds_read, " feeds read - ", time.strftime( "%c"), "UTC ------------------------------"
def main(): """ Main loop of the process """ # timeout in seconds timeout = 10 socket.setdefaulttimeout(timeout) print "------------------------------ BEGIN FEEDS UPDATE -", time.strftime("%c"), "UTC ------------------------------" # Delete old entries update_cursor = DBM.cursor('update') query = """ DELETE FROM rss WHERE date_parsed < date_sub(now(), interval %s day) """ print "Deleting old entries" update_cursor.execute(query, (dbconf.blogs['days_to_keep'],)) DBM.commit() update_cursor.close() """ Get the possible blog we can read """ now = time.time() cursor = DBM.cursor() query = """ SELECT blog_id, blog_url, blog_feed, UNIX_TIMESTAMP(blog_feed_checked), UNIX_TIMESTAMP(blog_feed_read) FROM sub_statuses, links, blogs WHERE (id = 1 AND status = "published" AND date > date_sub(now(), interval %s day) AND link_id = link AND blog_id = link_blog AND blog_feed_checked is not null AND blog_type <> 'disabled' AND blog_feed is not null) UNION SELECT blog_id, blog_url, blog_feed, UNIX_TIMESTAMP(blog_feed_checked), UNIX_TIMESTAMP(blog_feed_read) FROM blogs WHERE blog_type = 'aggregator' GROUP BY blog_id """ feeds_read = 0 print "Reading feeds..." cursor.execute(query, (dbconf.blogs['days_blogs'],)) for row in cursor: blog = BaseBlogs() blog.id, blog.url, blog.feed, blog.checked, blog.read = row blog.user_id = 0 blog.base_url = blog.url.replace('http://', '').replace('https://', '').replace('www.', '') if blog.is_banned(): continue print " >>> Reading: %s (%s)" % (blog.url, blog.feed) entries = blog.read_feed() print " Blog ", blog.id, " has ", entries, " entries %s" % blog.url feeds_read += 1 cursor.close() print "------------------------------ END - ", feeds_read, " feeds read - ", time.strftime("%c"), "UTC ------------------------------"
def main(): global configurations activity = {} seen_ips = {} # Delete old entries update_cursor = DBM.cursor('update') query = """ DELETE FROM clones WHERE clon_date < date_sub(now(), interval 120 day) """ update_cursor.execute(query) DBM.commit() if configuration.hours: minutes = configuration.hours * 60 elif configuration.minutes: minutes = configuration.minutes print "Analyzing IPs for %d minutes" % minutes cursor = DBM.cursor() queries = ( """select distinct vote_user_id, vote_ip_int from votes where vote_type in ('links', 'comments', 'posts') and vote_user_id != 0 and vote_date > date_sub(now(), interval %s minute)""", """select distinct comment_user_id, comment_ip_int from comments where comment_date > date_sub(now(), interval %s minute)""" ) for query in queries: cursor.execute(query, (minutes,)) for uid, ip_int in cursor: ip = IPAddress(ip_int) add_user_ip(uid, ip, activity) #print uid, ip_int, ip search_from = int(30*24 + (minutes*60)); print "Analyzing history for %d hours" % search_from clones = set() ips_counter = {} for u, ips in activity.iteritems(): # To avoid warning of truncated DOUBLE, the list of decimals is passed directly to the mysql driver format_strings = ','.join(['%s'] * len(ips)) query = """select distinct vote_user_id, vote_ip_int from votes where vote_ip_int in (%s) """ % format_strings query += """and vote_user_id != %d and vote_user_id > 0 and vote_date > date_sub(now(), interval %d hour)""" % (u, search_from) cursor.execute(query, tuple(ips)) for clon, ip_int in cursor: ip = IPAddress(ip_int) # print u, clon, ip clones.add((u, clon, ip)) subnet = IPSubnet(ip) if subnet not in ips_counter: ips_counter[subnet] = 1 else: ips_counter[subnet] += 1 #print clones, ips_counter c = 0 for u, clon, ip in clones: subnet = IPSubnet(ip) if ips_counter[subnet] < 30: print "Clon:", u, clon, ip, ips_counter[subnet] insert = """REPLACE INTO clones (clon_from, clon_to, clon_ip) VALUES (%s, %s, %s)""" update_cursor.execute(insert, (u, clon, ip)) insert = """INSERT IGNORE INTO clones (clon_to, clon_from, clon_ip) VALUES (%s, %s, %s)""" update_cursor.execute(insert, (u, clon, ip)) c += 1 if c % 10 == 0: DBM.commit() else: print "Rejected: ", str(ip), subnet, ips_counter[subnet] DBM.commit()
def main(): global configurations activity = {} seen_ips = {} # Delete old entries update_cursor = DBM.cursor('update') query = """ DELETE FROM clones WHERE clon_date < date_sub(now(), interval 120 day) """ update_cursor.execute(query) DBM.commit() if configuration.hours: minutes = configuration.hours * 60 elif configuration.minutes: minutes = configuration.minutes print "Analyzing IPs for %d minutes" % minutes cursor = DBM.cursor() queries = ( """select distinct vote_user_id, vote_ip_int from votes where vote_type in ('links', 'comments', 'posts') and vote_user_id != 0 and vote_date > date_sub(now(), interval %s minute)""", """select distinct comment_user_id, comment_ip_int from comments where comment_date > date_sub(now(), interval %s minute)""" ) for query in queries: cursor.execute(query, (minutes, )) for uid, ip_int in cursor: ip = IPAddress(ip_int) add_user_ip(uid, ip, activity) #print uid, ip_int, ip search_from = int(30 * 24 + (minutes * 60)) print "Analyzing history for %d hours" % search_from clones = set() ips_counter = {} for u, ips in activity.iteritems(): # To avoid warning of truncated DOUBLE, the list of decimals is passed directly to the mysql driver format_strings = ','.join(['%s'] * len(ips)) query = """select distinct vote_user_id, vote_ip_int from votes where vote_ip_int in (%s) """ % format_strings query += """and vote_user_id != %d and vote_user_id > 0 and vote_date > date_sub(now(), interval %d hour)""" % ( u, search_from) cursor.execute(query, tuple(ips)) for clon, ip_int in cursor: ip = IPAddress(ip_int) # print u, clon, ip clones.add((u, clon, ip)) subnet = IPSubnet(ip) if subnet not in ips_counter: ips_counter[subnet] = 1 else: ips_counter[subnet] += 1 #print clones, ips_counter c = 0 for u, clon, ip in clones: subnet = IPSubnet(ip) if ips_counter[subnet] < 30: print "Clon:", u, clon, ip, ips_counter[subnet] insert = """REPLACE INTO clones (clon_from, clon_to, clon_ip) VALUES (%s, %s, %s)""" update_cursor.execute(insert, (u, clon, ip)) insert = """INSERT IGNORE INTO clones (clon_to, clon_from, clon_ip) VALUES (%s, %s, %s)""" update_cursor.execute(insert, (u, clon, ip)) c += 1 if c % 10 == 0: DBM.commit() else: print "Rejected: ", str(ip), subnet, ips_counter[subnet] DBM.commit()