def evaluateDocument(uv_task): task_tag = "DOCUMENT EVALUATION" print "\n\n************** %s [START] ******************\n" % task_tag uv_task.setStatus(302) from lib.Worker.Models.uv_document import UnveillanceDocument from conf import DEBUG, UUID document = None if hasattr(uv_task, "doc_id"): if DEBUG: print "GETTING A DOCUMENT FROM ID: %s" % uv_task.doc_id document = UnveillanceDocument(_id=uv_task.doc_id) else: if DEBUG: print "INFLATING NEW DOCUMENT WITH FILE NAME: %s" % uv_task.file_name document = UnveillanceDocument(inflate={'file_name' : uv_task.file_name}) if document is None: print "\n\n************** %s [INVALID] ******************\n" % task_tag print "DOCUMENT INVALID (is None)" uv_task.fail(message="DOCUMUENT INVALID (is none)") return from lib.Worker.Models.uv_task import UnveillanceTask from vars import MIME_TYPE_TASKS, MIME_TYPES document.addCompletedTask(uv_task.task_path) uv_task.put_next(uv_task.task_path) mime_type = document.query_mime_type() print "\n\n************** %s [INFO] ******************\n" % task_tag print "MIME TYPE: %s" % mime_type if mime_type in MIME_TYPE_TASKS.keys(): if DEBUG: print "mime type (%s) usable..." % mime_type print MIME_TYPE_TASKS[mime_type] uv_task.put_next(MIME_TYPE_TASKS[mime_type]) else: uv_task.fail(status=412, message="document mime type (%s) not important" % mime_type) print "\n\n************** %s [ERROR] ******************\n" % task_tag return inflate = {'doc_id' : document._id} if mime_type == MIME_TYPES['symlink']: inflate['attempt_sync'] = True uv_task.routeNext(inflate=inflate) uv_task.finish() print "\n\n************** %s [END] ******************\n" % task_tag
def routeNextTask(task, document, task_extras=None): if not hasattr(task, 'no_continue') or not task.no_continue: next_task_path = None from lib.Worker.Models.uv_task import UnveillanceTask if hasattr(task, 'next_task_path'): next_task_path = task.next_task_path else: from vars import MIME_TYPE_TASKS if document.mime_type in MIME_TYPE_TASKS.keys(): try: next_task_path = MIME_TYPE_TASKS[document.mime_type][1] except Exception as e: if DEBUG: print e if next_task_path is not None: inflate = { 'task_path' : next_task_path, 'doc_id' : document._id, 'queue' : task.queue } if task_extras is not None: inflate.update(task_extras) next_task = UnveillanceTask(inflate=inflate) next_task.run()
def evaluateText(task): task_tag = "TEXT EVALUATION" print "\n\n************** %s [START] ******************\n" % task_tag print "evaluating text at %s" % task.doc_id task.setStatus(302) from lib.Worker.Models.uv_document import UnveillanceDocument from conf import DEBUG from vars import MIME_TYPE_TASKS document = UnveillanceDocument(_id=task.doc_id) """ limited choices: json, pgp, or txt """ if hasattr(task, "text_file"): content = document.loadAsset(task.text_file) else: content = document.loadFile(document.file_name) if content is None: print "no text to evaluate :(" print "\n\n************** %s [ERROR] ******************\n" % task_tag task.fail() return new_mime_type = None import json try: json_txt = json.loads(content) new_mime_type = "application/json" print "THIS IS JSON" except Exception as e: print "NOT JSON: %s" % e task_path = None if new_mime_type is not None: document.mime_type = new_mime_type document.save() if document.mime_type in MIME_TYPE_TASKS.keys(): task_path = MIME_TYPE_TASKS[document.mime_type][0] else: try: from lib.Core.Utils.funcs import cleanLine from vars import ASSET_TAGS txt_json = [] txt_pages = [] line_count = 0 # this is arbitrary MAX_LINES_PER_PAGE = 80 for line in content.splitlines(): txt_pages.append(cleanLine(line)) line_count += 1 if line_count == MAX_LINES_PER_PAGE: txt_json.append(" ".join(txt_pages)) txt_pages = [] line_count = 0 txt_json.append(" ".join(txt_pages)) document.total_pages = len(txt_json) document.save() asset_path = document.addAsset(txt_json, "doc_texts.json", as_literal=False, description="jsonified text of original document, segment by segment", tags=[ASSET_TAGS['TXT_JSON']]) from lib.Worker.Models.uv_text import UnveillanceText uv_text = UnveillanceText(inflate={ 'media_id' : document._id, 'searchable_text' : txt_json, 'file_name' : asset_path }) document.text_id = uv_text._id document.save() except Exception as e: if DEBUG: print "ERROR HERE GENERATING DOC TEXTS:" print e document.addCompletedTask(task.task_path) task.finish() task.routeNext() print "\n\n************** %s [END] ******************\n" % task_tag
def decrypt(uv_task): task_tag = "DECRYPTING" print "\n\n************** %s [START] ******************\n" % task_tag print "decrypting pgp blob for %s" % uv_task.doc_id uv_task.setStatus(302) from lib.Worker.Models.uv_document import UnveillanceDocument media = UnveillanceDocument(_id=uv_task.doc_id) if media is None: print "DOC IS NONE" print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail() return if not media.getFile(uv_task.pgp_file): print "NO PGP FILE" print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail() return from conf import getSecrets gpg_pwd = getSecrets("gpg_pwd") if gpg_pwd is None: err_msg = "NO PASSPHRASE TO DECRYPT" print err_msg print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail(message=err_msg) return gpg_dir = getSecrets("gpg_dir") # save as task.pgp_file.decrypted or whatever import os from fabric.api import local, settings from fabric.context_managers import hide from conf import ANNEX_DIR, DEBUG if not hasattr(uv_task, "save_as"): save_as = "%s.decrypted" % uv_task.pgp_file else: save_as = uv_task.save_as print "\n\n************** %s [INFO] ******************\n" % task_tag print "SAVING DECRYPTED ASSET TO %s IF SUCCESSFUL" % save_as with settings(hide("everything"), warn_only=True): d_cmd = "gpg --yes --no-tty --homedir=%s --passphrase %s --output %s --decrypt %s" % ( gpg_dir, gpg_pwd, os.path.join(ANNEX_DIR, save_as), os.path.join(ANNEX_DIR, uv_task.pgp_file), ) decrypted = local(d_cmd) print decrypted.return_code del gpg_pwd if decrypted.return_code == 2: err_msg = "could not successfully decrypt %s" % uv_task.pgp_file print err_msg print "\n\n************** %s [ERROR] ******************\n" % task_tag uv_task.fail(status=412, message=err_msg) return media.addCompletedTask(uv_task.task_path) if uv_task.get_next() is None: # route according to mime type # get mime type of decrypted from vars import MIME_TYPE_TASKS from lib.Worker.Utils.funcs import getFileType mime_type = getFileType(os.path.join(ANNEX_DIR, save_as)) # usable: json (a j3m), zip (a source or a log->batch) if mime_type in MIME_TYPE_TASKS.keys(): print "mime type (%s) usable..." % mime_type try: uv_task.put_next(MIME_TYPE_TASKS[mime_type]) except Exception as e: print e uv_task.routeNext(inflate={"file_name": save_as}) uv_task.finish() print "\n\n************** %s [END] ******************\n" % task_tag