def getprediction(model, titles, times, subreddits, texts):
    contexts = [[
        datetime.utcfromtimestamp(time).minute,
        datetime.utcfromtimestamp(time).hour,
        datetime.utcfromtimestamp(time).day,
        datetime.utcfromtimestamp(time).month,
        rd.convertutctoweekdayint(time), subreddit
    ] for time, subreddit in zip(times, subreddits)]

    return rd.removedecimals(
        rd.flatten(model.predict([contexts, titles, texts])))
예제 #2
0
def getprediction(model, titles, times, subreddits, texts, collection):
    contexts = [[
        datetime.utcfromtimestamp(time).minute,
        datetime.utcfromtimestamp(time).hour,
        datetime.utcfromtimestamp(time).day,
        datetime.utcfromtimestamp(time).month,
        rd.convertutctoweekdayint(time), subreddit
    ] for time, subreddit in zip(times, subreddits)]

    newest_comment = collection.find().sort([('timepostedutc', -1)]).limit(1)
    one_week_before_last_comment = math.floor(
        (datetime.utcfromtimestamp(newest_comment[0]['timepostedutc']) -
         timedelta(days=1, weeks=1)).replace(hour=0, minute=0,
                                             second=0).timestamp())
    highest_voted_comments_for_week = collection.find({
        'timepostedutc': {
            '$gte': one_week_before_last_comment
        }
    }).sort([('score', -1)]).limit(5)

    dataset_comment_reference_1 = tf.data.Dataset.from_tensors(
        [highest_voted_comments_for_week[0]['text']] * len(titles))
    dataset_comment_reference_2 = tf.data.Dataset.from_tensors(
        [highest_voted_comments_for_week[1]['text']] * len(titles))
    dataset_comment_reference_3 = tf.data.Dataset.from_tensors(
        [highest_voted_comments_for_week[2]['text']] * len(titles))
    dataset_comment_reference_4 = tf.data.Dataset.from_tensors(
        [highest_voted_comments_for_week[3]['text']] * len(titles))
    dataset_comment_reference_5 = tf.data.Dataset.from_tensors(
        [highest_voted_comments_for_week[4]['text']] * len(titles))

    #Build data pipeline
    dataset_context = tf.data.Dataset.from_tensors(contexts)
    dataset_title = tf.data.Dataset.from_tensors(titles)
    dataset_text = tf.data.Dataset.from_tensors(texts)
    dataset_inputs = tf.data.Dataset.zip(
        (dataset_context, dataset_title, dataset_text,
         dataset_comment_reference_1, dataset_comment_reference_2,
         dataset_comment_reference_3, dataset_comment_reference_4,
         dataset_comment_reference_5))
    dataset = tf.data.Dataset.zip((dataset_inputs, ))

    return rd.removedecimals(rd.flatten(model.predict(dataset)))
#mongodb setup
connection_string = "mongodb://localhost:27017" if len(
    sys.argv) == 1 else sys.argv[1]
client = MongoClient(connection_string)
db = client["reddit-comment-vote-predictor"]
collection = db.comments

model = rm.getmodelandweights()

comments = []
database_comments = collection.find().limit(200)

for comment in database_comments:
    comments += [comment]

titles = [c['submission_title'] for c in comments]
times = [c['timepostedutc'] for c in comments]
subreddits = [rd.convertsubreddittoint(c['subreddit']) for c in comments]
texts = [c['text'] for c in comments]

predictions = rm.getprediction(model, titles, times, subreddits, texts,
                               collection)

predictions.sort()
predictions = rd.removedecimals(predictions)

print(predictions)

print("Max prediction: " + str(max(predictions)))
print("Min prediction: " + str(min(predictions)))