from __future__ import absolute_import from celery import Celery from mediameter import settings app = Celery( "mediameter", broker=settings.get("queue", "broker_url"), backend=settings.get("queue", "backend_url"), include=["mediameter.tasks"], ) # expire backend results in one hour app.conf.update(CELERY_TASK_RESULT_EXPIRES=3600) if __name__ == "__main__": app.start()
CORE_NLP_QUERY_STORY_COUNT = 200 current_dir = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(current_dir, "logging.json"), "r") as f: logging_config = json.load(f) logging.config.dictConfig(logging_config) log = logging.getLogger(__name__) log.info("---------------------------------------------------------------------------") start_time = time.time() requests_logger = logging.getLogger("requests") requests_logger.setLevel(logging.WARN) stories_to_fetch = settings.get("mediacloud", "stories_per_fetch") content_to_use = settings.get("mediacloud", "content") log.info( "Fetching {} stories by page (in {} format) from MediaCloud to geocode".format(stories_to_fetch, content_to_use) ) # load the relevant settings topic_id = settings.get("mediacloud", "topic_id") log.info(" topic_id: {}".format(topic_id)) next_link_id = None story_time = None content_time = None more_stories = True
CORE_NLP_QUERY_STORY_COUNT = 200 current_dir = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(current_dir,'logging.json'), 'r') as f: logging_config = json.load(f) logging.config.dictConfig(logging_config) log = logging.getLogger(__name__) log.info("---------------------------------------------------------------------------") start_time = time.time() requests_logger = logging.getLogger('requests') requests_logger.setLevel(logging.WARN) stories_to_fetch = settings.get('mediacloud','stories_per_fetch') content_to_use = settings.get('mediacloud','content') log.info("Fetching {} stories (in {} format) from MediaCloud to geocode".format(stories_to_fetch,content_to_use) ) # load the relevant settings solr_filter = settings.get('mediacloud','solr_filter') log.info(" query: {}".format(solr_filter)) last_processed_stories_id = settings.get('mediacloud','last_processed_stories_id') log.info(" starting at stories_processed_id {}".format(last_processed_stories_id) ) story_time = None content_time = None if content_to_use == CONTENT_NLP: # Fetch some story ids and queue them up to get NLP results