Esempio n. 1
0
from utility.NbConfig import NbConfig
from utility import client_factory
from utility import nb_logging

logger = nb_logging.setup_logger('prune')


if __name__ == "__main__":
    db_client = client_factory.get_db_client()
    db_client.ensure_config_table_exists()
    config = NbConfig({})
    logger.info("Config to write: %s", config.config)
    db_client.write_config(config.config)
    config_read = db_client.read_config()
    logger.info("Config read from DB: %s", config_read.config)
Esempio n. 2
0
import warnings

import MySQLdb
from datadog import statsd
from ddtrace import patch_all
from ddtrace import tracer

from connectors.DbConnector import DbConnector
from utility import nb_logging
from utility.NbConfig import NbConfig

import rollbar

logger = nb_logging.setup_logger('MySqlClient')
patch_all()

STATSD_PREFIX = 'nb.MySqlClient.'


class MySqlClient(DbConnector):
    def __init__(self, host, user, password, db_name):
        DbConnector.__init__(self)
        self.host = host
        self.user = user
        self.password = password
        self.db_name = db_name

	logger.info("Attempt to connect to DB %s on %s as user %s", db_name, host, user)

        self.conn = MySQLdb.connect(host=self.host,
                                    user=self.user,
Esempio n. 3
0
import json

import requests
import requests.exceptions
import rollbar
from bs4 import BeautifulSoup
from datadog import statsd
from time import sleep

from utility import nb_logging

logger = nb_logging.setup_logger('NewsblurConnector')


class NewsblurConnector:

    def __init__(self, config, username, password):
        self.cookies = None
        self.config = config
        self.verify = config.get('VERIFY')
        self.nb_endpoint = config.get('NB_ENDPOINT')

        self.credentials = {'username': username, 'password': password}

    @statsd.timed('nb.NewsblurConnector.login')
    def login(self):
        """ log in and save cookies """
        r = requests.post(self.nb_endpoint + '/api/login', self.credentials)
        logger.debug('NewsBlur login response code: %s', r.status_code)
        statsd.increment('nb.http_requests.post')
        self.cookies = r.cookies
Esempio n. 4
0
from datadog import statsd

import time
from utility import client_factory
from utility import nb_logging

from connectors.NewsblurConnector import NewsblurConnector

logger = nb_logging.setup_logger('populate')
config = None


@statsd.timed('nb.populate.populate')
def populate():
    logger.info('Set up DB and add a row for each HN story')

    db_client = client_factory.get_db_client()
    db_client.ensure_stories_table_exists()
    config = db_client.read_config()
    db_client.close_connection()

    nb_client = client_factory.get_newsblur_client()
    nb_client.login()
    hashlist = nb_client.get_nb_hash_list()

    logger.info('Size of hashlist is %s', len(hashlist))

    batch_size = int(config.get('BATCH_SIZE'))
    logger.debug('Batch size is %s', batch_size)

    i = 0
Esempio n. 5
0
from datadog import statsd

from models.NbUrl import NbUrl
from utility import nb_logging, client_factory

logger = nb_logging.setup_logger('add_domains')


@statsd.timed('nb.add_domains.add_domains')
def add_domains():
    db_client = client_factory.get_db_client()
    db_client.ensure_domains_table_exists()

    rows = db_client.list_urls()

    for row in rows:
        nb_hash = row[0]
        nb_url = NbUrl(row[1])
        domain, toplevel, toplevel_new = nb_url.get_domain_info()
        db_client.insert_domain_entry(nb_hash, nb_url.url, domain, toplevel, toplevel_new)


if __name__ == "__main__":
    add_domains()
Esempio n. 6
0
from datadog import statsd

from utility import client_factory
from utility import nb_logging

logger = nb_logging.setup_logger('update_comment_counts')


# update comment counts for stories that might have had comments added:
# last updated is older than threshold
# comments are still open on story
@statsd.timed('nb.populate.update_comment_counts')
def update_comment_counts():
    logger.info('Update comment counts to stories in DB')
    db_client = client_factory.get_db_client()

    rows = db_client.list_comment_count_update_candidates()

    logger.debug('Found %s candidates for updating comment count', len(rows))
    nb_client = client_factory.get_newsblur_client()
    nb_client.login()

    for row in rows:
        url = row[0]
        count = nb_client.get_comment_count(url)
        logger.debug("Count for %s is %s", url, count)
        if count is not None:
            db_client.add_comment_count(url, count)
            statsd.increment('nb.add_comment_counts.comment_counts_added')
    logger.info('Finished updating comment counts')
Esempio n. 7
0
import warnings

import sqlite3
from datadog import statsd

from connectors.DbConnector import DbConnector
from utility import nb_logging
from utility.NbConfig import NbConfig

import rollbar

logger = nb_logging.setup_logger('SqliteClient')

STATSD_PREFIX = 'nb.SqliteClient.'


class SqliteClient(DbConnector):
    def __init__(self):
        DbConnector.__init__(self)
        db_file = 'nb.sqlite'
        #        logger.info("Attempt to connect to DB file %s", db_file)
        self.conn = sqlite3.connect(db_file)

    def ensure_domains_table_exists(self):
        create_table_query = '''CREATE TABLE IF NOT EXISTS domains
                 (id INTEGER PRIMARY KEY ASC, nb_hash TEXT UNIQUE,
                 domain TEXT, toplevel TEXT,
                 toplevel_new TEXT, FOREIGN KEY (nb_hash) REFERENCES stories (hash) )'''

        self.execute_wrapper(create_table_query)
        self.conn.commit()
Esempio n. 8
0
import rollbar
import schedule
import time

from tasks.add_comment_counts import add_comment_counts
from tasks.add_domains import add_domains
from tasks.populate import populate
from tasks.populate import update_hash_list
from tasks.prune import prune_starred

from utility import client_factory
from utility import nb_logging

rollbar.init('00b402fc0da54ed1af8687d4c4389911')
logger = nb_logging.setup_logger('app')

from datadog import initialize
initialize(statsd_host='dd_agent')


def get_config(task):
    db_client = client_factory.get_db_client()
    config = db_client.read_config()
    #    logger.debug('Config for %s: %s', task, config)
    return config


def periodic_update_hash_list():
    logger.info('Running scheduled update hash list task')
    update_hash_list()
    logger.info('Finished scheduled update hash list task')
Esempio n. 9
0
from connectors.MySqlClient import MySqlClient
from connectors.NewsblurConnector import NewsblurConnector
from utility import nb_logging
import os

logger = nb_logging.setup_logger('client_factory')
SECRETS_DIR = os.getenv('SECRETS_DIR', '/run/secrets/')


def get_secret(name):
    with open(os.path.join(SECRETS_DIR, name), 'r') as f:
        secret = f.read().strip()
    return secret


def get_db_client():
    host = get_secret('DB_HOST')
    user = get_secret('DB_USER')
    password = get_secret('DB_PASS')
    db_name = get_secret('DB_NAME')
    logger.debug('host: {}'.format(host))
    return MySqlClient(host=host, user=user, password=password, db_name=db_name)


def get_newsblur_client():
    db_client = get_db_client()

    username = get_secret('NB_USERNAME')
    password = get_secret('NB_PASSWORD')

    return NewsblurConnector(db_client.read_config(), username, password)
Esempio n. 10
0
from datadog import statsd

from utility import client_factory
from utility import nb_logging

from connectors.NewsblurConnector import NewsblurConnector

logger = nb_logging.setup_logger('add_comment_counts')


# read through DB for rows without comment count, then add it
@statsd.timed('nb.populate.add_comment_counts')
def add_comment_counts():
    logger.info('Add comment counts to stories in DB')
    db_client = client_factory.get_db_client()
    rows = db_client.list_stories_without_comment_count()
    logger.debug('Found %s rows', len(rows))
    nb_client = client_factory.get_newsblur_client()
    nb_client.login()

    for row in rows:
        #        url = row.hnurl
        url = row[0]
        count = nb_client.get_comment_count(url)
        logger.debug("Count for %s is %s", url, count)
        if count is not None:
            db_client.add_comment_count(url, count)
            statsd.increment('nb.add_comment_counts.comment_counts_added')
    logger.info('Finished adding comment counts')
Esempio n. 11
0
import json

import requests
import requests.exceptions
import rollbar
from bs4 import BeautifulSoup
from datadog import statsd
from ddtrace import patch
from ddtrace import tracer
from time import sleep

from utility import nb_logging

patch(requests=True)

logger = nb_logging.setup_logger('NewsblurConnector')


class NewsblurConnector:

    def __init__(self, config, username, password):
        self.cookies = None
        self.config = config
        self.verify = config.get('VERIFY')
        self.nb_endpoint = config.get('NB_ENDPOINT')

        self.credentials = {'username': username, 'password': password}

    @statsd.timed('nb.NewsblurConnector.login')
    def login(self):
        """ log in and save cookies """
Esempio n. 12
0
from datadog import statsd
from utility import client_factory
from utility import nb_logging

logger = nb_logging.setup_logger('populate')


@statsd.timed('nb.populate.update_hash_list')
def update_hash_list():
    logger.info('Get full list of NB story hashes')

    nb_client = client_factory.get_newsblur_client()
    nb_client.login()
    hash_list = nb_client.get_nb_hash_list()
    logger.info('Size of hashlist retrieved from Newsblur is %s', len(hash_list))
    db_client = client_factory.get_db_client()
    db_client.add_hashes(hash_list)


@statsd.timed('nb.populate.populate')
def populate():
    logger.info('Add a row for each HN story')

    db_client = client_factory.get_db_client()
    db_client.ensure_stories_table_exists()
    config = db_client.read_config()
    db_client.close_connection()

    nb_client = client_factory.get_newsblur_client()
    nb_client.login()
Esempio n. 13
0
import warnings

import MySQLdb
from datadog import statsd
from ddtrace import patch_all
from ddtrace import tracer

from connectors.DbConnector import DbConnector
from utility import nb_logging
from utility.NbConfig import NbConfig

import rollbar

logger = nb_logging.setup_logger('MySqlClient')
patch_all()

STATSD_PREFIX = 'nb.MySqlClient.'


class MySqlClient(DbConnector):
    def __init__(self, host, user, password, db_name):
        DbConnector.__init__(self)
        self.host = host
        self.user = user
        self.password = password
        self.db_name = db_name

        logger.info("Attempt to connect to DB %s on %s as user %s", db_name, host, user)

        self.conn = MySQLdb.connect(host=self.host,
                                    user=self.user,
Esempio n. 14
0
from ddtrace import patch_all

import time

from datadog import statsd

from connectors.DbConnector import DbConnector
from connectors.dynamo.DomainModel import DomainModel
from connectors.dynamo.StoryModel import StoryModel
from connectors.dynamo.ErrorModel import ErrorModel
from utility import nb_logging

patch_all()
logger = nb_logging.setup_logger('DynamoDbClient')


class DynamoDbClient(DbConnector):
    def add_comment_count(self, comments_url, count):
        stories = StoryModel.query(comments_url)
        for dummy in stories:  # only want the first element...
            story = dummy
        story.comments = count
        story.save()
        statsd.increment('nb.comment_counts_added')

    def add_story(self, nb_hash, added, comments_url, story_url):
        story = StoryModel(comments_url, nb_hash=nb_hash, added=added, url=story_url)
        try:
            story.save()
        except Exception as err:
            logger.error("Caught exception while saving Story model, wait 2 sec and retry")
Esempio n. 15
0
File: app.py Progetto: bmordue/nb
import time
from ddtrace import patch_all
from ddtrace import tracer

from tasks.add_comment_counts import add_comment_counts
from tasks.add_domains import add_domains
from tasks.populate import populate
from tasks.prune import prune_starred

from utility import client_factory
from utility import nb_logging

patch_all()

rollbar.init('00b402fc0da54ed1af8687d4c4389911')
logger = nb_logging.setup_logger('app')

from datadog import initialize
initialize(statsd_host='dd_agent')

def get_config(task):
    db_client = client_factory.get_db_client()
    config = db_client.read_config()
    logger.debug('Config for %s: %s', task, config)
    return config


def periodic_populate():
    config = get_config('populate')
    if 'True' == config.get('SHOULD_POPULATE'):
        logger.info('Running scheduled populate task')