import itertools import random import sqlite3 import common nusers = 1 total_votes_per_user = common.schedule_ndays() * common.max_votes_per_day input_connection, input_cursor, output_connection, output_cursor = common.io_connections_and_cursors() vote_id = common.next_output_id(output_cursor) nanswers = next(input_cursor.execute( 'SELECT COUNT(*) FROM posts WHERE PostTypeId = ?', (common.Posts_PostTypeId_Answer,)))[0] # Second puppet only. for user_row in itertools.islice(common.iterate_users(), 1, 1 + nusers): user_id = user_row[common.users_csv_user_id_col] nscheduled_answers = 0 answer_indexes = set() while nscheduled_answers < total_votes_per_user: n = random.randint(0, nanswers - 1) if n not in answer_indexes: answer_indexes.add(n) nscheduled_answers += 1 user_nvotes = 0 for answer_i in answer_indexes: post_row = next(input_cursor.execute( 'SELECT * FROM posts WHERE PostTypeId = ? LIMIT 1 OFFSET ?', (common.Posts_PostTypeId_Answer, answer_i))) output_cursor.execute('INSERT INTO votes VALUES (?, ?, ?, ?, NULL, NULL)', (vote_id, user_id, post_row['Id'], post_row['ParentId']))
first_user_range = 1 # Last user to schedule for, exclusive. after_last_user_range = 2 total_votes_per_user = common.schedule_ndays() * common.max_votes_per_day input_connection, input_cursor, output_connection, output_cursor = common.io_connections_and_cursors() vote_id = common.next_output_id(output_cursor) post_rows = input_cursor.execute(""" SELECT answers.* FROM posts AS answers INNER JOIN posts AS questions WHERE questions.PostTypeId = ? AND answers.PostTypeId = ? AND answers.ParentId = questions.Id AND answers.CommunityOwnedDate IS NULL AND questions.Tags LIKE '%<git>%' ORDER BY Id ASC LIMIT ?; """, (common.Posts_PostTypeId_Question, common.Posts_PostTypeId_Answer, total_votes_per_user)) for user_row in itertools.islice(common.iterate_users(), first_user_range, after_last_user_range): user_id = user_row[common.users_csv_user_id_col] user_nvotes = 0 for post_row in post_rows: output_cursor.execute('INSERT INTO votes VALUES (?, ?, ?, ?, NULL, NULL)', (vote_id, user_id, post_row['Id'], post_row['ParentId'])) vote_id += 1 user_nvotes += 1 if user_nvotes != total_votes_per_user: print('Warning: not enough posts.') common.commit_and_closeclose_io_connections(input_connection, output_connection)
input_connection, input_cursor, output_connection, output_cursor = common.io_connections_and_cursors( ) vote_id = common.next_output_id(output_cursor) post_rows = input_cursor.execute( """ SELECT answers.* FROM posts AS answers INNER JOIN posts AS questions WHERE questions.PostTypeId = ? AND answers.PostTypeId = ? AND answers.ParentId = questions.Id AND answers.CommunityOwnedDate IS NULL AND questions.Tags LIKE '%<git>%' ORDER BY Id ASC LIMIT ?; """, (common.Posts_PostTypeId_Question, common.Posts_PostTypeId_Answer, total_votes_per_user)) for user_row in itertools.islice(common.iterate_users(), first_user_range, after_last_user_range): user_id = user_row[common.users_csv_user_id_col] user_nvotes = 0 for post_row in post_rows: output_cursor.execute( 'INSERT INTO votes VALUES (?, ?, ?, ?, NULL, NULL)', (vote_id, user_id, post_row['Id'], post_row['ParentId'])) vote_id += 1 user_nvotes += 1 if user_nvotes != total_votes_per_user: print('Warning: not enough posts.') common.commit_and_closeclose_io_connections(input_connection, output_connection)
connection.close() # Is thread safe: http://stackoverflow.com/questions/2973900/is-pythons-logging-module-thread-safe logging.basicConfig( filename = os.path.splitext(os.path.realpath(__file__))[0] + '.log', level = logging.DEBUG, format = '%(asctime)s | %(levelname)-7s | %(threadName)7s | %(message)s', ) logging.Formatter.converter = time.gmtime script_dir = os.path.dirname(os.path.realpath(__file__)) logging.debug( 'Last git commit SHA = ' + subprocess.check_output(['git', '-C', script_dir, 'rev-parse', 'HEAD'])) torrc_dir = os.path.join(script_dir, 'torrc') if len(sys.argv) > 1: casperjs_path = sys.argv[1] else: casperjs_path = '/home/ciro/.nvm/v0.10.26/bin/casperjs' with open(common.users_csv_path, 'r') as user_file: user_csv = csv.reader(user_file) threads = [] for i, user_row in enumerate(common.iterate_users()): t = UserVotesThread(user_row, i) threads.append(t) t.start() for t in threads: t.join()
# Is thread safe: http://stackoverflow.com/questions/2973900/is-pythons-logging-module-thread-safe logging.basicConfig( filename=os.path.splitext(os.path.realpath(__file__))[0] + '.log', level=logging.DEBUG, format='%(asctime)s | %(levelname)-7s | %(threadName)7s | %(message)s', ) logging.Formatter.converter = time.gmtime script_dir = os.path.dirname(os.path.realpath(__file__)) logging.debug( 'Last git commit SHA = ' + subprocess.check_output(['git', '-C', script_dir, 'rev-parse', 'HEAD'])) torrc_dir = os.path.join(script_dir, 'torrc') if len(sys.argv) > 1: casperjs_path = sys.argv[1] else: casperjs_path = '/home/ciro/.nvm/v0.10.26/bin/casperjs' with open(common.users_csv_path, 'r') as user_file: user_csv = csv.reader(user_file) threads = [] for i, user_row in enumerate(common.iterate_users()): t = UserVotesThread(user_row, i) threads.append(t) t.start() for t in threads: t.join()