Beispiel #1
0
def save_tfidf_results(self,job_id):
    db_collection = _get_db_collection()
    job_info = db_collection.find_one({'_id': ObjectId(job_id)})
    logger.info("Job %s: TF-IDF" % (job_id))
    # tfidf them
    filepaths = job_info['filepaths']
    tfidf = samediff.analysis.tf_idf(filepaths)
    cosine_similarity = samediff.analysis.cosine_similarity(filepaths)
    # save the results back to the db (based on the job_number)
    job_info['tfidf'] = tfidf
    job_info['cosineSimilarity'] = cosine_similarity
    # delete the raw input files
    for path in job_info['filepaths']:
        os.remove(path)
    del job_info['filepaths']
    job_info['status'] = 'complete'
    db_collection.save(job_info)
    # TODO: catch any exceptions and queue them up for retry attempts
    
    # notify them with email
    # TODO: Internationalize and put the text stuff into some kind of templating structure
    name = job_info['email'].split('@')[0]
    body = u'Dear %s, \n\nYour SameDiff job is ready at this URL: %s! \n\nSincerely, \n %s ' % ( name , job_info["results_url"], settings.get('mail','from_name'))
    envelope = Envelope(
        from_addr=(settings.get('mail','from_email'), settings.get('mail','from_name')),
        to_addr=(job_info['email'], name),
        subject=u'Your SameDiff job is ready!',
        text_body=body)
    envelope.send('mail.gandi.net', login=settings.get('mail','login'),
              password=settings.get('mail','password'), tls=True)
Beispiel #2
0
app.config["UPLOADED_DOCS_DEST"] = TEMP_DIR

docs = UploadSet(name="docs", extensions=("txt"))

configure_uploads(app, (docs,))
patch_request_class(app, 4 * 1024 * 1024)  # 4MB

# setup logging
logging.basicConfig(filename=os.path.join(base_dir, "../", "samediff.log"), level=logging.DEBUG)
logger = logging.getLogger(__name__)

logger.info("Temp Dir is %s" % TEMP_DIR)

db_client = MongoClient(get_mongo_uri())
app.db = db_client[settings.get("db", "name")]
app.db_collection = app.db[settings.get("db", "collection")]


@app.before_request
def before():
    if request.view_args and "lang_code" in request.view_args:
        if request.view_args["lang_code"] not in ("es", "en"):
            return abort(404)  # bail on invalid language
        g.current_lang = request.view_args["lang_code"]
        request.view_args.pop("lang_code")


@babel.localeselector
def get_locale():
    return g.get("current_lang", "en")
Beispiel #3
0
from __future__ import absolute_import
import os
from celery import Celery
from samediff import settings

celery_app = Celery('samediff',
             broker=settings.get('queue','broker_url'),
             backend=settings.get('queue','backend_url'),
             include=['samediff.tasks'])

# expire backend results in one hour
celery_app.conf.update(
    CELERY_TASK_RESULT_EXPIRES=3600,
)

if __name__ == '__main__':
    celery_app.start()
Beispiel #4
0
def _get_db_collection():
    db_client = MongoClient(samediff.get_mongo_uri())
    db = db_client[settings.get('db','name')]
    db_collection = db[settings.get('db','collection')]
    return db_collection