Exemple #1
0
def search_email_by_community(*args, **param_args):
    tangelo.content_type("application/json")
    tangelo.log("search_email_by_community(args: %s kwargs: %s)" %
                (str(args), str(param_args)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **param_args)
    community = nth(args, 0, '')

    # TODO: set from UI
    size = param_args.get('size', 2500)

    if not data_set_id:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing data_set_id")
    if not community:
        return tangelo.HTTPStatusCode(400,
                                      "invalid service call - missing sender")

    email_addrs = parseParam_email_addr(**param_args)

    qs = parseParamTextQuery(**param_args)

    return es_get_all_email_by_community(data_set_id, community, email_addrs,
                                         qs, start_datetime, end_datetime,
                                         size)
Exemple #2
0
def post(*arg, **kwarg):
    '''
    This is the main listener for github webhooks.
    '''

    # retrieve the headers from the request
    # headers = tangelo.request_headers() # <- not merged
    headers = cherrypy.request.headers

    # get the request body as a dict
    body = tangelo.request_body()
    s = body.read()

    # make sure this is a valid request coming from github
    computed_hash = hmac.new(str(_secret_key), s, hashlib.sha1).hexdigest()
    received_hash = headers.get('X-Hub-Signature', 'sha1=')[5:]
    if not hmac.compare_digest(computed_hash, received_hash):
        return tangelo.HTTPStatusCode(403, "Invalid signature")

    try:
        obj = json.loads(s)
    except:
        return tangelo.HTTPStatusCode(400, "Could not load json object.")

    if headers['X-Github-Event'] == 'push':
        # add a new item to the test queue
        add_push(obj)
    else:
        return tangelo.HTTPStatusCode(400, "Unhandled event")

    return 'OK'
Exemple #3
0
def search_email_by_topic(*args, **param_args):
    tangelo.content_type("application/json")
    tangelo.log("search_email_by_topic(args: %s kwargs: %s)" %
                (str(args), str(param_args)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **param_args)

    # TODO: set from UI
    size = param_args.get('size', 2500)

    if not data_set_id:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing data_set_id")

    if not param_args.get("topic_index"):
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing topic_index")
    topic = parseParamTopic(**param_args)

    email_addrs = parseParam_email_addr(**param_args)

    qs = parseParamTextQuery(**param_args)

    return es_get_all_email_by_topic(data_set_id,
                                     topic=topic,
                                     email_addrs=email_addrs,
                                     qs=qs,
                                     start_datetime=start_datetime,
                                     end_datetime=end_datetime,
                                     size=size)
Exemple #4
0
def seedSearch(*args):
    email_id = urllib.unquote(nth(args, 0))
    if not email_id:
        return tangelo.HTTPStatusCode(400, "missing argument email_id")
    line_num = findLineNum(email_id)
    seed_url = "{0}/firstemail/{1}".format(daemon_url, line_num)
    request(seed_url)
    next_url = "{0}/getNextEmail".format(daemon_url)
    start_point = request(next_url)
    if not start_point:
        return tangelo.HTTPStatusCode(400, "failed to set starting email")
    return findEmailId(start_point)
Exemple #5
0
def advanced_query(args, kwargs):
    if len(args) != 1 or 'q' not in kwargs:
        raise cherrypy.HTTPError(400)

    query_string = kwargs['q']
    try:
        if '~slr~' in query_string:
            helium_query = query.to_helium_query(query_string)
            mongo_query = helium_query_to_mongo_query(helium_query)
        else:
            mongo_query = query.to_mongo_query(query_string)
    except query.InvalidQuery:
        return tangelo.HTTPStatusCode(400, 'Invalid query')

    rep = args[0]
    proj = generate_mongo_projection(rep)

    limit = getlimit(kwargs)

    if rep == 'cjson':
        complete_data = {
            '$and': [{
                '3dStructure': {
                    '$exists': 1
                }
            }, {
                'diagram': {
                    '$exists': 1
                }
            }]
        }
        mongo_query = {'$and': [complete_data, mongo_query]}

    return execute_query(mongo_query, proj, rep, limit)
Exemple #6
0
def get(*pargs, **kwargs):
    if len(pargs) == 3:
        return basic_query(pargs, kwargs)
    elif len(pargs) == 1:
        return advanced_query(pargs, kwargs)

    return tangelo.HTTPStatusCode(400, "Invalid request")
Exemple #7
0
def get_top_email_by_text_query(data_set_id, qs, start_datetime, end_datetime,
                                size):

    if not qs:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing search term(s)")

    query = _build_email_query(qs=qs,
                               date_bounds=(start_datetime, end_datetime))
    tangelo.log("es_search.get_graph_for_text_query(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    graph = _build_graph_for_emails(data_set_id, results["hits"],
                                    results["total"])

    # Get attachments for community
    query = _build_email_query(qs=qs,
                               date_bounds=(start_datetime, end_datetime),
                               attachments_only=True)
    tangelo.log("es_search.get_top_email_by_text_query(attachment-query: %s)" %
                (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments

    return graph
Exemple #8
0
def search_email_by_conversation(*path_args, **param_args):
    tangelo.content_type("application/json")
    tangelo.log("search.search_email_by_conversation(path_args[%s] %s)" %
                (len(path_args), str(path_args)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **param_args)
    # TODO: set from UI
    size = param_args.get('size', 2500)

    # parse the sender address and the recipient address
    sender_list = parseParamEmailSender(**param_args)
    cherrypy.log("\tsender_list: %s)" % str(sender_list))

    recipient_list = parseParamEmailRecipient(**param_args)
    cherrypy.log("\trecipient_list: %s)" % str(recipient_list))

    document_uid = parseParamDocumentUID(**param_args)
    cherrypy.log("\tdocument_uid: %s)" % str(document_uid))

    document_datetime = parseParamDocumentDatetime(**param_args)
    cherrypy.log("\tdocument_datetime: %s)" % str(document_datetime))
    if not document_datetime:
        return tangelo.HTTPStatusCode(
            400,
            "invalid service call - missing mandatory param 'document_datetime'"
        )

    sender_address, recipient_address = parseParamAllSenderAllRecipient(
        **param_args)

    return es_get_conversation(data_set_id, sender_address, recipient_address,
                               start_datetime, end_datetime, size / 2,
                               document_uid, document_datetime)
Exemple #9
0
def getEmail(*args):
    email = urllib.unquote(nth(args, 0, ''))
    if not email:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    tangelo.content_type("application/json")
    return {"email": queryEmail(email), "entities": queryEntity(email)}
Exemple #10
0
def getAllAttachmentBySender(*args, **kwargs):
    tangelo.log("getAttachmentsSender(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)
    sender = nth(args, 0, '')
    if not data_set_id:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing data_set_id")
    if not sender:
        return tangelo.HTTPStatusCode(400,
                                      "invalid service call - missing sender")

    tangelo.content_type("application/json")

    return get_attachments_by_sender(data_set_id, sender, start_datetime,
                                     end_datetime, size)
Exemple #11
0
def email_scores(*args):
    email_id = unquote(nth(args, 0, ''))
    category = nth(args, 1, 'all')
    if not email_id:
        return tangelo.HTTPStatusCode(400,
                                      "invalid service call - missing email")

    return {"scores": [], "email": email_id, "category": category}
Exemple #12
0
def download(data):
    user = data.get("user")

    if not user:
        return tangelo.HTTPStatusCode(400, "invalid service call missing user")

    passwd = data.get("pass")
    limit = data.get("limit", "2000")
    logfile = "{}/{}.log".format(work_dir, user)
    spit(logfile, "[Start] {}\n".format(user), True)
    cherrypy.log("logfile: {}".format(logfile))

    def download_thread():
        try:
            cherrypy.log("Thread Start User: {}".format(user))

            try:
                session = newman_email.login(user, passwd, logfile)
                fldr = "{}/emails/{}".format(webroot, user)
                cherrypy.log("Login User: {}".format(user))

                if os.path.exists(fldr):
                    rmrf(fldr)

                mkdir(fldr)

                spit("{}/output.csv".format(fldr),
                     newman_email.headerrow() + "\n")

                mkdir(fldr + "/emails")

                newman_email.download(session, user, fldr, int(limit), logfile)

                spit(logfile, "[Completed Download] {}\n".format(user))
            except Exception as ex:
                spit(logfile, "[Error] {}\n".format(ex))
                cherrypy.log("[Error] {}\n".format(ex))
            except:
                spit(logfile, "[Error]")
                cherrypy.log("[Error]")
                error_info = sys.exc_info()[0]
                cherrypy.log(error_info)
                spit(logfile,
                     "[Error] {}\n".format(error_info.replace('\n', ' ')))

            finally:
                newman_email.close_session(session)

        except:
            error_info = sys.exc_info()[0]
            cherrypy.log(error_info)
            spit(logfile, "[Error] {}\n".format(error_info.replace('\n', ' ')))

    thr = threading.Thread(target=download_thread, args=())
    thr.start()
    tangelo.content_type("application/json")
    return {"id": user}
def delete(resource, projname, datatype=None, dataset=None):
    if resource != "project":
        return tangelo.HTTPStatusCode(
            400, "Bad resource type '%s' - allowed types are: project")

    # (This is expressing xor)
    if (datatype is None) != (dataset is None):
        return tangelo.HTTPStatusCode(
            400,
            "Bad arguments - 'datatype' and 'dataset' must both be specified if either one is specified"
        )

    if datatype is None:
        api.deleteProjectNamed(projname)
    else:
        api.deleteDataset(projname, datatype, dataset)

    return "OK"
Exemple #14
0
def setSelectedDataSet(*args):
    tangelo.content_type("application/json")
    data_set_id = urllib.unquote(nth(args, 0, ''))
    if not data_set_id:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing data_set_id")

    resp = initialize_email_addr_cache(data_set_id)
    _current_data_set_selected = data_set_id
    return _index_record(data_set_id)
Exemple #15
0
def getAttachCount(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getAttachCount(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    attach_type = urllib.unquote(nth(args, 0, ''))
    if not attach_type:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing attach_type")

    attach_type = 'all'  #hack for now
    email_address_list = parseParamEmailAddress(**kwargs)

    if not email_address_list:
        activity = get_total_attachment_activity(
            data_set_id,
            data_set_id,
            query_function=attachment_histogram,
            sender_email_addr="",
            start=start_datetime,
            end=end_datetime,
            interval="week")
        result = {
            "account_activity_list": [{
                "account_id": data_set_id,
                "data_set_id": data_set_id,
                "account_start_datetime": start_datetime,
                "account_end_datetime": end_datetime,
                "activities": activity
            }]
        }

    else:
        result = {
            "account_activity_list": [{
                "account_id":
                account_id,
                "data_set_id":
                data_set_id,
                "account_start_datetime":
                start_datetime,
                "account_end_datetime":
                end_datetime,
                "activities":
                get_emailer_attachment_activity(data_set_id,
                                                account_id,
                                                (start_datetime, end_datetime),
                                                interval="week")
            } for account_id in email_address_list]
        }

    return result
Exemple #16
0
def getRollup(*args):
    entity = urllib.unquote(nth(args, 0, ''))
    if not entity:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt_entity_rollup_id,
                           entity) as qry:
            rtn = qry.cursor().fetchone()
            tangelo.content_type("application/json")
            return {"rollupId": rtn}
Exemple #17
0
def post(*pargs, **kwargs):
    input = cherrypy.request.body.read()

    if not pargs:
        return tangelo.HTTPStatusCode(400, "No task module specified")

    pargs = filter(None, pargs)
    task_module = '.'.join(pargs)
    async_result = celery.send_task('%s.run' % task_module, [input])

    return {'id': async_result.task_id}
Exemple #18
0
def get_attachment_by_id(*args, **kwargs):

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)

    attachment_id=nth(args, 0, '')

    if not attachment_id:
        attachment_id = parseParamAttachmentGUID(**kwargs)

    cherrypy.log("email.get_attachments_sender(index=%s, attachment_id=%s)" % (data_set_id, attachment_id))
    if not data_set_id:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing index")
    if not attachment_id:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing attachment_id")

    attachment = es().get(index=data_set_id, doc_type="attachments", id=attachment_id)

    if not attachment:
        return tangelo.HTTPStatusCode(400, "no attachments found for (index=%s, attachment_id=%s)" % (data_set_id, attachment_id))

    attachment = attachment["_source"]
    ext = attachment["extension"]
    filename = attachment["filename"]

    mime_type = mimetypes.guess_type(filename)[0]

    if not mime_type:
        tangelo.content_type("application/x-download")
        header("Content-Disposition", 'attachment; filename="{}"'.format(filename))
    else:
        tangelo.content_type(mime_type)
        header("Content-Disposition", 'inline; filename="{}"'.format(filename))

    content = attachment["contents64"]
    bytes = base64.b64decode(content)
    # dump(bytes, filename)

    as_str = str(bytes)
    tangelo.log(str(len(as_str)), "Uploading Attachment - length = ")

    return as_str
Exemple #19
0
def getAccountActivity(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getAccountActivity(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    account_type = urllib.unquote(nth(args, 0, ''))
    if not account_type:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing account_type")

    email_address_list = parseParamEmailAddress(**kwargs)

    if not email_address_list:
        result = {
            "account_activity_list": [{
                "account_id":
                data_set_id,
                "data_set_id":
                data_set_id,
                "account_start_datetime":
                start_datetime,
                "account_end_datetime":
                end_datetime,
                "activities":
                get_email_activity(data_set_id,
                                   data_set_id,
                                   date_bounds=(start_datetime, end_datetime),
                                   interval="week")
            }]
        }
    else:
        result = {
            "account_activity_list": [{
                "account_id":
                account_id,
                "data_set_id":
                data_set_id,
                "account_start_datetime":
                start_datetime,
                "account_end_datetime":
                end_datetime,
                "activities":
                get_email_activity(data_set_id,
                                   data_set_id,
                                   account_id,
                                   date_bounds=(start_datetime, end_datetime),
                                   interval="week")
            } for account_id in email_address_list]
        }

    return result
Exemple #20
0
def getTopRollup(*args):
    amt = urllib.unquote(nth(args, 0, ''))
    if not amt:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    stmt = stmt_top_rollup_entities + ("limit {0}".format(amt))
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            rtn = [r for r in qry.cursor()]
            rtn = rtn if rtn else []
            tangelo.content_type("application/json")
            return {"entities": rtn}
Exemple #21
0
def setStarred(*args, **kwargs):
    tangelo.log("setStarred(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    tangelo.content_type("application/json")

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    email_id = args[-1]
    if not email_id:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing email_id")

    starred = parseParamStarred(**kwargs)

    return set_starred(data_set_id, [email_id], starred)
Exemple #22
0
def getEmail(*args, **kwargs):
    tangelo.log("getEmail(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    tangelo.content_type("application/json")

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    qs = parseParamTextQuery(**kwargs)

    email_id = args[-1]
    if not email_id:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing email_id")

    return get_email(data_set_id, email_id, qs)
Exemple #23
0
def get(job_id, operation, **kwargs):
    job = AsyncResult(job_id, backend=celery.backend)
    if operation == 'status':
        response = {'status': job.state}
        if job.state == states.FAILURE:
            response['message'] = str(job.result)
        elif job.state == 'PROGRESS':
            response['meta'] = str(job.result)

        return response
    elif operation == 'result':
        response = {'result': job.result}
        return response
    else:
        return tangelo.HTTPStatusCode(400, "Invalid request")
Exemple #24
0
def email_scores(*args):
    email_id = unquote(nth(args, 0, ''))
    category = nth(args, 1, 'all')
    if not email_id:
        return tangelo.HTTPStatusCode(400,
                                      "invalid service call - missing email")

    stmt = (" select score from xref_email_topic_score "
            " where category_id = %s and email_id = %s "
            " order by idx ")

    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt, category, email_id) as qry:
            rtn = [head(r) for r in qry.cursor()]
            tangelo.content_type("application/json")
            return {"scores": rtn, "email": email_id, "category": category}
Exemple #25
0
def getAttachmentsSender(*args):
    sender = urllib.unquote(nth(args, 0, ''))
    if not sender:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    tangelo.content_type("application/json")
    stmt = (
        " select id, dir, datetime, from_addr, tos, ccs, bccs, subject, attach, bodysize "
        " from email "
        " where from_addr = %s and attach != '' ")
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt, sender) as qry:
            rtn = [[
                val.encode('utf-8')
                if isinstance(val, basestring) else str(val) for val in row
            ] for row in qry.cursor()]
            return {"sender": sender, "email_attachments": rtn}
Exemple #26
0
def execute_query(find, proj, rep, limit):
    retry_count = 10

    while (retry_count > 0):
        try:
            db = connect(config['server'], config['db'])

            tangelo.log(str(find))

            cursor = db['molecules'].find(find, proj)
            cursor.limit(limit)

            return result_to_response(rep, cursor)
        except pymongo.errors.AutoReconnect:
            retry_count -= 1

    return tangelo.HTTPStatusCode(500, 'Unable to connect to mongochem')
Exemple #27
0
def unknown(*args):
    return tangelo.HTTPStatusCode(400, "invalid service call")
Exemple #28
0
 def unknown(**kwargs):
     return tangelo.HTTPStatusCode(404, "unknown service call")
Exemple #29
0
    def invoke_service(self, module, *pargs, **kwargs):
        # TODO(choudhury): This method should attempt to load the named module,
        # then invoke it with the given arguments.  However, if the named
        # module is "config" or something similar, the method should instead
        # launch a special "config" app, which lists the available app modules,
        # along with docstrings or similar.  It should also allow the user to
        # add/delete search paths for other modules.
        tangelo.content_type("text/plain")

        # Save the system path (be sure to *make a copy* using the list()
        # function) - it will be modified before invoking the service, and must
        # be restored afterwards.
        origpath = list(sys.path)

        # By default, the result should be an object with error message in if
        # something goes wrong; if nothing goes wrong this will be replaced
        # with some other object.
        result = {}

        # Store the modpath in the thread-local storage (tangelo.paths() makes
        # use of this per-thread data, so this is the way to get the data
        # across the "module boundary" properly).
        modpath = os.path.dirname(module)
        cherrypy.thread_data.modulepath = modpath
        cherrypy.thread_data.modulename = module

        # Extend the system path with the module's home path.
        sys.path.insert(0, modpath)

        # Import the module if not already imported previously (or if the
        # module to import, or its configuration file, has been updated since
        # the last import).
        try:
            stamp = self.modules.get(module)
            mtime = os.path.getmtime(module)

            config_file = module[:-2] + "json"
            config_mtime = None
            if os.path.exists(config_file):
                config_mtime = os.path.getmtime(config_file)

            if (stamp is None or mtime > stamp["mtime"] or
                (config_mtime is not None and config_mtime > stamp["mtime"])):
                if stamp is None:
                    tangelo.log("loading new module: " + module)
                else:
                    tangelo.log("reloading module: " + module)

                # Load any configuration the module might carry with it.
                if config_mtime is not None:
                    try:
                        with open(config_file) as f:
                            config = json.loads(json_minify(f.read()))
                            if type(config) != dict:
                                msg = ("Service module configuration file " +
                                       "does not contain a key-value store " +
                                       "(i.e., a JSON Object)")
                                tangelo.log(msg)
                                raise TypeError(msg)
                    except IOError:
                        tangelo.log("Could not open config file %s" %
                                    (config_file))
                        raise
                    except ValueError as e:
                        tangelo.log("Error reading config file %s: %s" %
                                    (config_file, e))
                        raise
                else:
                    config = {}

                cherrypy.config["module-config"][module] = config

                # Remove .py to get the module name
                name = module[:-3]

                # Load the module.
                service = imp.load_source(name, module)
                self.modules[module] = {
                    "module": service,
                    "mtime": max(mtime, config_mtime)
                }
            else:
                service = stamp["module"]
        except:
            bt = traceback.format_exc()

            tangelo.log("Error importing module %s" % (tangelo.request_path()),
                        "SERVICE")
            tangelo.log(bt, "SERVICE")

            result = tangelo.HTTPStatusCode(
                "501 Error in Python Service", Tangelo.literal +
                "There was an error while " + "trying to import module " +
                "%s:<br><pre>%s</pre>" % (tangelo.request_path(), bt))
        else:
            # Try to run the service - either it's in a function called
            # "run()", or else it's in a REST API consisting of at least one of
            # "get()", "put()", "post()", or "delete()".
            #
            # Collect the result in a variable - depending on its type, it will
            # be transformed in some way below (by default, to JSON, but may
            # also raise a cherrypy exception, log itself in a streaming table,
            # etc.).
            try:
                if 'run' in dir(service):
                    # Call the module's run() method, passing it the positional
                    # and keyword args that came into this method.
                    result = service.run(*pargs, **kwargs)
                else:
                    # Reaching here means it's a REST API.  Check for the
                    # requested method, ensure that it was marked as being part
                    # of the API, and call it; or give a 405 error.
                    method = cherrypy.request.method
                    restfunc = service.__dict__[method.lower()]
                    if (restfunc is not None and hasattr(restfunc, "restful")
                            and restfunc.restful):
                        result = restfunc(*pargs, **kwargs)
                    else:
                        result = tangelo.HTTPStatusCode(
                            405, "Method not allowed")
            except Exception as e:
                bt = traceback.format_exc()

                tangelo.log(
                    "Caught exception while executing service %s" %
                    (tangelo.request_path()), "SERVICE")
                tangelo.log(bt, "SERVICE")

                result = tangelo.HTTPStatusCode(
                    "501 Error in Python Service", Tangelo.literal +
                    "There was an error " + "executing service " +
                    "%s:<br><pre>%s</pre>" % (tangelo.request_path(), bt))

        # Restore the path to what it was originally.
        sys.path = origpath

        # Check the type of the result to decide what result to finally return:
        #
        # 1. If it is an HTTPStatusCode object, raise a cherrypy HTTPError
        # exception, which will cause the browser to do the right thing.
        #
        # 2. TODO: If it's a Python generator object, log it with the Tangelo
        # streaming API.
        #
        # 3. If it's a Python dictionary, convert it to JSON.
        #
        # 4. If it's a string, don't do anything to it.
        #
        # This allows the services to return a Python object if they wish, or
        # to perform custom serialization (such as for MongoDB results, etc.).
        if isinstance(result, tangelo.HTTPStatusCode):
            if result.msg:
                raise cherrypy.HTTPError(result.code, result.msg)
            else:
                raise cherrypy.HTTPError(result.code)
        elif "next" in dir(result):
            if self.stream:
                return self.stream.add(result)
            else:
                return json.dumps({
                    "error":
                    "Streaming is not supported " +
                    "in this instance of Tangelo"
                })
        elif not isinstance(result, types.StringTypes):
            try:
                result = json.dumps(result)
            except TypeError as e:
                msg = Tangelo.literal + "<p>A JSON type error occurred in service " + tangelo.request_path(
                ) + ":</p>"
                msg += "<p><pre>" + cgi.escape(e.message) + "</pre></p>"

                raise cherrypy.HTTPError("501 Error in Python Service", msg)

        return result
def export_emails_archive(data_set_id,
                          email_ids=["f9c9c59a-7fe8-11e5-bb05-08002705cb99"]):
    cherrypy.log("email.get_attachments_sender(index=%s, attachment_id=%s)" %
                 (data_set_id, email_ids))
    if not data_set_id:
        return tangelo.HTTPStatusCode(400,
                                      "invalid service call - missing index")
    # if not email:
    #     return tangelo.HTTPStatusCode(400, "invalid service call - missing attachment_id")

    # elasticsearch.exceptions.ConnectionTimeout: ConnectionTimeout caused by - ReadTimeoutError(HTTPConnectionPool(host='10.1.70.143', port=9200): Read timed out. (read timeout=10))
    es = Elasticsearch([{
        "host": "10.1.70.143",
        "port": 9200
    }],
                       request_timeout=60)
    # TODO can implement with multiple doc_types and combine attachments in
    emails = es.mget(index=data_set_id,
                     doc_type="emails",
                     body={"docs": [{
                         "_id": id
                     } for id in email_ids]})

    # TODO filename
    filename = "export.tar.gz"
    tangelo.content_type("application/x-gzip")
    header("Content-Disposition", 'attachment; filename="{}"'.format(filename))

    string_buffer = cStringIO.StringIO()
    tar = tarfile.open(mode='w:gz', fileobj=string_buffer)

    # Add each email to the tar
    for email_source in emails["docs"]:

        email = email_source["_source"]

        tarinfo_parent = tarfile.TarInfo(name=email["id"])
        tarinfo_parent.type = tarfile.DIRTYPE
        tarinfo_parent.mode = 0755
        tarinfo_parent.mtime = time.time()
        tar.addfile(tarinfo_parent)

        tarinfo = tarfile.TarInfo(email["id"] + "/" + email["id"] + ".json")
        # TODO -- email transformation
        data_string = json.dumps(email)
        fobj = cStringIO.StringIO(data_string)

        tarinfo.size = len(data_string)
        tarinfo.mode = 0644
        tarinfo.mtime = time.time()
        tar.addfile(tarinfo, fobj)

        # Get the attachments
        if email["attachments"]:
            attachments = es.mget(index=data_set_id,
                                  doc_type="attachments",
                                  body={
                                      "docs": [{
                                          "_id": attch["guid"]
                                      } for attch in email["attachments"]]
                                  })
            for attachment_source in attachments["docs"]:
                attachment = attachment_source["_source"]
                filename = attachment["filename"]
                attch_data = str(base64.b64decode(attachment["contents64"]))

                tarinfo_attch = tarfile.TarInfo(email["id"] + "/" + filename)
                tarinfo_attch.size = len(attch_data)
                tarinfo_attch.mode = 0644
                tarinfo_attch.mtime = time.time()
                tar.addfile(tarinfo_attch, cStringIO.StringIO(attch_data))
    tar.close()

    return string_buffer.getvalue()