def getprediction(model, titles, times, subreddits, texts): contexts = [[ datetime.utcfromtimestamp(time).minute, datetime.utcfromtimestamp(time).hour, datetime.utcfromtimestamp(time).day, datetime.utcfromtimestamp(time).month, rd.convertutctoweekdayint(time), subreddit ] for time, subreddit in zip(times, subreddits)] return rd.removedecimals( rd.flatten(model.predict([contexts, titles, texts])))
def getprediction(model, titles, times, subreddits, texts, collection): contexts = [[ datetime.utcfromtimestamp(time).minute, datetime.utcfromtimestamp(time).hour, datetime.utcfromtimestamp(time).day, datetime.utcfromtimestamp(time).month, rd.convertutctoweekdayint(time), subreddit ] for time, subreddit in zip(times, subreddits)] newest_comment = collection.find().sort([('timepostedutc', -1)]).limit(1) one_week_before_last_comment = math.floor( (datetime.utcfromtimestamp(newest_comment[0]['timepostedutc']) - timedelta(days=1, weeks=1)).replace(hour=0, minute=0, second=0).timestamp()) highest_voted_comments_for_week = collection.find({ 'timepostedutc': { '$gte': one_week_before_last_comment } }).sort([('score', -1)]).limit(5) dataset_comment_reference_1 = tf.data.Dataset.from_tensors( [highest_voted_comments_for_week[0]['text']] * len(titles)) dataset_comment_reference_2 = tf.data.Dataset.from_tensors( [highest_voted_comments_for_week[1]['text']] * len(titles)) dataset_comment_reference_3 = tf.data.Dataset.from_tensors( [highest_voted_comments_for_week[2]['text']] * len(titles)) dataset_comment_reference_4 = tf.data.Dataset.from_tensors( [highest_voted_comments_for_week[3]['text']] * len(titles)) dataset_comment_reference_5 = tf.data.Dataset.from_tensors( [highest_voted_comments_for_week[4]['text']] * len(titles)) #Build data pipeline dataset_context = tf.data.Dataset.from_tensors(contexts) dataset_title = tf.data.Dataset.from_tensors(titles) dataset_text = tf.data.Dataset.from_tensors(texts) dataset_inputs = tf.data.Dataset.zip( (dataset_context, dataset_title, dataset_text, dataset_comment_reference_1, dataset_comment_reference_2, dataset_comment_reference_3, dataset_comment_reference_4, dataset_comment_reference_5)) dataset = tf.data.Dataset.zip((dataset_inputs, )) return rd.removedecimals(rd.flatten(model.predict(dataset)))
#mongodb setup connection_string = "mongodb://localhost:27017" if len( sys.argv) == 1 else sys.argv[1] client = MongoClient(connection_string) db = client["reddit-comment-vote-predictor"] collection = db.comments model = rm.getmodelandweights() comments = [] database_comments = collection.find().limit(200) for comment in database_comments: comments += [comment] titles = [c['submission_title'] for c in comments] times = [c['timepostedutc'] for c in comments] subreddits = [rd.convertsubreddittoint(c['subreddit']) for c in comments] texts = [c['text'] for c in comments] predictions = rm.getprediction(model, titles, times, subreddits, texts, collection) predictions.sort() predictions = rd.removedecimals(predictions) print(predictions) print("Max prediction: " + str(max(predictions))) print("Min prediction: " + str(min(predictions)))