def save_tfidf_results(self,job_id): db_collection = _get_db_collection() job_info = db_collection.find_one({'_id': ObjectId(job_id)}) logger.info("Job %s: TF-IDF" % (job_id)) # tfidf them filepaths = job_info['filepaths'] tfidf = samediff.analysis.tf_idf(filepaths) cosine_similarity = samediff.analysis.cosine_similarity(filepaths) # save the results back to the db (based on the job_number) job_info['tfidf'] = tfidf job_info['cosineSimilarity'] = cosine_similarity # delete the raw input files for path in job_info['filepaths']: os.remove(path) del job_info['filepaths'] job_info['status'] = 'complete' db_collection.save(job_info) # TODO: catch any exceptions and queue them up for retry attempts # notify them with email # TODO: Internationalize and put the text stuff into some kind of templating structure name = job_info['email'].split('@')[0] body = u'Dear %s, \n\nYour SameDiff job is ready at this URL: %s! \n\nSincerely, \n %s ' % ( name , job_info["results_url"], settings.get('mail','from_name')) envelope = Envelope( from_addr=(settings.get('mail','from_email'), settings.get('mail','from_name')), to_addr=(job_info['email'], name), subject=u'Your SameDiff job is ready!', text_body=body) envelope.send('mail.gandi.net', login=settings.get('mail','login'), password=settings.get('mail','password'), tls=True)
app.config["UPLOADED_DOCS_DEST"] = TEMP_DIR docs = UploadSet(name="docs", extensions=("txt")) configure_uploads(app, (docs,)) patch_request_class(app, 4 * 1024 * 1024) # 4MB # setup logging logging.basicConfig(filename=os.path.join(base_dir, "../", "samediff.log"), level=logging.DEBUG) logger = logging.getLogger(__name__) logger.info("Temp Dir is %s" % TEMP_DIR) db_client = MongoClient(get_mongo_uri()) app.db = db_client[settings.get("db", "name")] app.db_collection = app.db[settings.get("db", "collection")] @app.before_request def before(): if request.view_args and "lang_code" in request.view_args: if request.view_args["lang_code"] not in ("es", "en"): return abort(404) # bail on invalid language g.current_lang = request.view_args["lang_code"] request.view_args.pop("lang_code") @babel.localeselector def get_locale(): return g.get("current_lang", "en")
from __future__ import absolute_import import os from celery import Celery from samediff import settings celery_app = Celery('samediff', broker=settings.get('queue','broker_url'), backend=settings.get('queue','backend_url'), include=['samediff.tasks']) # expire backend results in one hour celery_app.conf.update( CELERY_TASK_RESULT_EXPIRES=3600, ) if __name__ == '__main__': celery_app.start()
def _get_db_collection(): db_client = MongoClient(samediff.get_mongo_uri()) db = db_client[settings.get('db','name')] db_collection = db[settings.get('db','collection')] return db_collection