Ejemplo n.º 1
0
from __future__ import absolute_import

from celery import Celery
from mediameter import settings

app = Celery(
    "mediameter",
    broker=settings.get("queue", "broker_url"),
    backend=settings.get("queue", "backend_url"),
    include=["mediameter.tasks"],
)

# expire backend results in one hour
app.conf.update(CELERY_TASK_RESULT_EXPIRES=3600)

if __name__ == "__main__":
    app.start()
CORE_NLP_QUERY_STORY_COUNT = 200

current_dir = os.path.dirname(os.path.abspath(__file__))

with open(os.path.join(current_dir, "logging.json"), "r") as f:
    logging_config = json.load(f)
logging.config.dictConfig(logging_config)

log = logging.getLogger(__name__)
log.info("---------------------------------------------------------------------------")
start_time = time.time()
requests_logger = logging.getLogger("requests")
requests_logger.setLevel(logging.WARN)

stories_to_fetch = settings.get("mediacloud", "stories_per_fetch")
content_to_use = settings.get("mediacloud", "content")
log.info(
    "Fetching {} stories by page (in {} format) from MediaCloud to geocode".format(stories_to_fetch, content_to_use)
)

# load the relevant settings
topic_id = settings.get("mediacloud", "topic_id")
log.info("  topic_id: {}".format(topic_id))

next_link_id = None

story_time = None
content_time = None

more_stories = True
Ejemplo n.º 3
0
CORE_NLP_QUERY_STORY_COUNT = 200

current_dir = os.path.dirname(os.path.abspath(__file__))

with open(os.path.join(current_dir,'logging.json'), 'r') as f:
    logging_config = json.load(f)
logging.config.dictConfig(logging_config)

log = logging.getLogger(__name__)
log.info("---------------------------------------------------------------------------")
start_time = time.time()
requests_logger = logging.getLogger('requests')
requests_logger.setLevel(logging.WARN)

stories_to_fetch = settings.get('mediacloud','stories_per_fetch')
content_to_use = settings.get('mediacloud','content')
log.info("Fetching {} stories (in {} format) from MediaCloud to geocode".format(stories_to_fetch,content_to_use) )

# load the relevant settings
solr_filter = settings.get('mediacloud','solr_filter')
log.info("  query: {}".format(solr_filter))
last_processed_stories_id = settings.get('mediacloud','last_processed_stories_id')
log.info("  starting at stories_processed_id {}".format(last_processed_stories_id) )

story_time = None
content_time = None

if content_to_use == CONTENT_NLP:

    # Fetch some story ids and queue them up to get NLP results