Ejemplo n.º 1
0
def topic_list(*args, **kwargs):
    category = nth(args, 0, 'all')
    #tangelo.log("category %s" %(category))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)
    tangelo.content_type("application/json")
    return get_categories(data_set_id)
Ejemplo n.º 2
0
def get_graph_for_entity(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("entity.get_graph_for_entity(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)
    email_address_list = parseParamEmailAddress(**kwargs)
    entity_dict = parseParamEntity(**kwargs)
    # TODO set from UI
    size = size if size > 500 else 2500

    qs = parseParamTextQuery(**kwargs)

    query = _build_email_query(email_addrs=email_address_list,
                               qs=qs,
                               entity=entity_dict,
                               date_bounds=(start_datetime, end_datetime))
    tangelo.log("entity.get_graph_for_entity(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    graph = _build_graph_for_emails(data_set_id, results["hits"],
                                    results["total"])

    # Get attachments for community
    query = _build_email_query(email_addrs=email_address_list,
                               qs=qs,
                               entity=entity_dict,
                               date_bounds=(start_datetime, end_datetime),
                               attachments_only=True)
    tangelo.log("entity.get_graph_by_entity(attachment-query: %s)" % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments

    return graph
Ejemplo n.º 3
0
def searchStarred(*args, **kwargs):
    tangelo.log("email.searchStarred(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    tangelo.content_type("application/json")

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)

    size = size if size >500 else 2500

    # TODO set from UI
    query_terms=''
    email_address_list = []

    query = _build_email_query(email_addrs=email_address_list, qs=query_terms, date_bounds=(start_datetime, end_datetime), starred=True)
    tangelo.log("email.searchStarred(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    graph = _build_graph_for_emails(data_set_id, results["hits"], results["total"])

    # Get attachments for community
    query = _build_email_query(email_addrs=email_address_list, qs=query_terms, date_bounds=(start_datetime, end_datetime), attachments_only=True, starred=True)
    tangelo.log("email.searchStarred(attachment-query: %s)" % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments

    return graph
Ejemplo n.º 4
0
def listAllDataSet():

    tangelo.log("datasource.listAllDataSet()")

    # Ignore index keys in ES that are not in the newman_app.conf
    # Find all the indexes that begin with the index loader prefix
    indexes = [
        _index_record(index) for index in index_list() if
        index in data_set_names() or index.startswith(index_creator_prefix())
    ]

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**{})

    email_addrs = get_ranked_email_address_from_email_addrs_index(
        data_set_id, start_datetime, end_datetime, size)["emails"]
    email_addrs = {email_addr[0]: email_addr for email_addr in email_addrs}

    return {
        "data_set_selected": getDefaultDataSetID(),
        "data_sets": indexes,
        "top_hits": {
            "order_by": "rank",
            "email_addrs": email_addrs
        }
    }
Ejemplo n.º 5
0
def searchStarred(*args, **kwargs):
    tangelo.log("email.searchStarred(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    tangelo.content_type("application/json")

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    size = size if size > 500 else 2500

    # TODO set from UI
    query_terms = ''
    email_address_list = []

    query = _build_email_query(email_addrs=email_address_list,
                               qs=query_terms,
                               date_bounds=(start_datetime, end_datetime),
                               starred=True)
    tangelo.log("email.searchStarred(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    graph = _build_graph_for_emails(data_set_id, results["hits"],
                                    results["total"])

    # Get attachments for community
    query = _build_email_query(email_addrs=email_address_list,
                               qs=query_terms,
                               date_bounds=(start_datetime, end_datetime),
                               attachments_only=True,
                               starred=True)
    tangelo.log("email.searchStarred(attachment-query: %s)" % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments

    return graph
Ejemplo n.º 6
0
def getRankedAddresses(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getRankedAddresses(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)
    # TODO - reminder no 'qs' here set to ''
    # qs = parseParamTextQuery(**kwargs)
    qs = ''

    # TODO this needs to come from UI
    size = size if size > 500 else 2500

    ranked_addresses = get_ranked_email_address_from_email_addrs_index(
        data_set_id, start_datetime, end_datetime, size)
    top_address_list = []
    for i, email_address in enumerate(ranked_addresses["emails"]):
        graph = es_get_all_email_by_address(data_set_id, email_address[0], qs,
                                            start_datetime, end_datetime, size)

        top_address_list.append({
            "address_search_url_path": email_address[0],
            "parameters": kwargs,
            "search_results": {
                "mail_sent_count": email_address[6],
                "mail_received_count": email_address[5],
                "mail_attachment_count": email_address[7],
                "query_matched_count": graph["query_hits"],
                "associated_count": len(graph["graph"]["nodes"])
            },
            "TEMPORARY_GRAPH": graph
        })

    return {"top_address_list": top_address_list}
Ejemplo n.º 7
0
def getAttachFileType(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getAttachFileType(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)
    
    top_count = int(size)
        
    attach_type = urllib.unquote(nth(args, 0, ''))
    if not attach_type:
        attach_type = 'all' #hack for now


    email_address_list = parseParamEmailAddress(**kwargs);


    if not email_address_list :
        file_types = get_top_attachment_types(data_set_id, date_bounds=(start_datetime, end_datetime), num_top_attachments=top_count)[:top_count]
    else :
        #TODO: implement populating the attachment file-types under individual email-accounts; simulate result for now
        file_types = get_top_attachment_types(data_set_id, date_bounds=(start_datetime, end_datetime), num_top_attachments=top_count)[:top_count]

    result = {
              "account_id" : data_set_id,
              "data_set_id" : data_set_id,
              "account_start_datetime" : start_datetime,
              "account_end_datetime" : end_datetime,
              "types" : file_types
             }
        
    return result
Ejemplo n.º 8
0
def search_email_by_conversation(*path_args, **param_args):
    tangelo.content_type("application/json")
    tangelo.log("search.search_email_by_conversation(path_args[%s] %s)" %
                (len(path_args), str(path_args)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **param_args)
    # TODO: set from UI
    size = param_args.get('size', 2500)

    # parse the sender address and the recipient address
    sender_list = parseParamEmailSender(**param_args)
    cherrypy.log("\tsender_list: %s)" % str(sender_list))

    recipient_list = parseParamEmailRecipient(**param_args)
    cherrypy.log("\trecipient_list: %s)" % str(recipient_list))

    document_uid = parseParamDocumentUID(**param_args)
    cherrypy.log("\tdocument_uid: %s)" % str(document_uid))

    document_datetime = parseParamDocumentDatetime(**param_args)
    cherrypy.log("\tdocument_datetime: %s)" % str(document_datetime))
    if not document_datetime:
        return tangelo.HTTPStatusCode(
            400,
            "invalid service call - missing mandatory param 'document_datetime'"
        )

    sender_address, recipient_address = parseParamAllSenderAllRecipient(
        **param_args)

    return es_get_conversation(data_set_id, sender_address, recipient_address,
                               start_datetime, end_datetime, size / 2,
                               document_uid, document_datetime)
Ejemplo n.º 9
0
def getRankedAddresses(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getRankedAddresses(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)
    # TODO - reminder no 'qs' here set to ''
    # qs = parseParamTextQuery(**kwargs)
    qs=''

    # TODO this needs to come from UI
    size = size if size >500 else 2500

    ranked_addresses = get_ranked_email_address_from_email_addrs_index(data_set_id, start_datetime, end_datetime, size)
    top_address_list = []
    for i, email_address in enumerate(ranked_addresses["emails"]):
        graph = es_get_all_email_by_address(data_set_id, email_address[0], qs, start_datetime, end_datetime, size )

        top_address_list.append({
            "address_search_url_path" : email_address[0],
            "parameters" : kwargs,
            "search_results" : {
                "mail_sent_count" : email_address[6],
                "mail_received_count" : email_address[5],
                "mail_attachment_count" : email_address[7],
                "query_matched_count" : graph["query_hits"],
                "associated_count" : len(graph["graph"]["nodes"])
            },
            "TEMPORARY_GRAPH" : graph
        })


    return {"top_address_list" : top_address_list}
Ejemplo n.º 10
0
def search_email_by_community(*args, **param_args):
    tangelo.content_type("application/json")
    tangelo.log("search_email_by_community(args: %s kwargs: %s)" %
                (str(args), str(param_args)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **param_args)
    community = nth(args, 0, '')

    # TODO: set from UI
    size = param_args.get('size', 2500)

    if not data_set_id:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing data_set_id")
    if not community:
        return tangelo.HTTPStatusCode(400,
                                      "invalid service call - missing sender")

    email_addrs = parseParam_email_addr(**param_args)

    qs = parseParamTextQuery(**param_args)

    return es_get_all_email_by_community(data_set_id, community, email_addrs,
                                         qs, start_datetime, end_datetime,
                                         size)
Ejemplo n.º 11
0
def search_email_by_topic(*args, **param_args):
    tangelo.content_type("application/json")
    tangelo.log("search_email_by_topic(args: %s kwargs: %s)" %
                (str(args), str(param_args)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **param_args)

    # TODO: set from UI
    size = param_args.get('size', 2500)

    if not data_set_id:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing data_set_id")

    if not param_args.get("topic_index"):
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing topic_index")
    topic = parseParamTopic(**param_args)

    email_addrs = parseParam_email_addr(**param_args)

    qs = parseParamTextQuery(**param_args)

    return es_get_all_email_by_topic(data_set_id,
                                     topic=topic,
                                     email_addrs=email_addrs,
                                     qs=qs,
                                     start_datetime=start_datetime,
                                     end_datetime=end_datetime,
                                     size=size)
Ejemplo n.º 12
0
def get_graph_for_entity(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("entity.get_graph_for_entity(args: %s kwargs: %s)" % (str(args), str(kwargs)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)
    email_address_list = parseParamEmailAddress(**kwargs);
    entity_dict = parseParamEntity(**kwargs)
    # TODO set from UI
    size = size if size >500 else 2500

    qs = parseParamTextQuery(**kwargs)

    query = _build_email_query(email_addrs=email_address_list, qs=qs, entity=entity_dict, date_bounds=(start_datetime, end_datetime))
    tangelo.log("entity.get_graph_for_entity(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    graph = _build_graph_for_emails(data_set_id, results["hits"], results["total"])

    # Get attachments for community
    query = _build_email_query(email_addrs=email_address_list, qs=qs, entity=entity_dict, date_bounds=(start_datetime, end_datetime), attachments_only=True)
    tangelo.log("entity.get_graph_by_entity(attachment-query: %s)" % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments

    return graph
Ejemplo n.º 13
0
def get_topics_by_query(*args, **kwargs):
    tangelo.content_type("application/json")
    algorithm = kwargs.get('algorithm', 'lingo')
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)
    email_address_list = parseParamEmailAddress(**kwargs)

    # TODO -------------------------------------------------------------------------
    # TODO  REMEMBER TO EVALUATE QUERY TERMS -- VERY IMPORTANT for good clustering!
    # TODO -------------------------------------------------------------------------
    query_terms = ''
    # TODO set from UI
    analysis_field = kwargs.get("analysis_field", "_source.body")
    # TODO set from UI
    num_returned = 20

    clusters = get_dynamic_clusters(data_set_id,
                                    "emails",
                                    email_addrs=email_address_list,
                                    query_terms=query_terms,
                                    topic_score=None,
                                    entity={},
                                    date_bounds=(start_datetime, end_datetime),
                                    cluster_fields=[analysis_field],
                                    cluster_title_fields=["_source.subject"],
                                    algorithm=algorithm,
                                    max_doc_pool_size=500)

    return {"topics": clusters[:num_returned]}
Ejemplo n.º 14
0
def getRankedEmails(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getRankedEmails(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    return get_ranked_email_address_from_email_addrs_index(
        data_set_id, start_datetime, end_datetime, size)
Ejemplo n.º 15
0
def getCommunities(*args, **kwargs):
    tangelo.log("getCommunities(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    tangelo.content_type("application/json")
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)

    #top_count = int(urllib.unquote(nth(args, 0, "40")))
    top_count = int(size);

    return {"communities" : get_top_communities(data_set_id, date_bounds=(start_datetime, end_datetime), num_communities=top_count)[:top_count]}
Ejemplo n.º 16
0
def getAttachCount(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getAttachCount(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    attach_type = urllib.unquote(nth(args, 0, ''))
    if not attach_type:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing attach_type")

    attach_type = 'all'  #hack for now
    email_address_list = parseParamEmailAddress(**kwargs)

    if not email_address_list:
        activity = get_total_attachment_activity(
            data_set_id,
            data_set_id,
            query_function=attachment_histogram,
            sender_email_addr="",
            start=start_datetime,
            end=end_datetime,
            interval="week")
        result = {
            "account_activity_list": [{
                "account_id": data_set_id,
                "data_set_id": data_set_id,
                "account_start_datetime": start_datetime,
                "account_end_datetime": end_datetime,
                "activities": activity
            }]
        }

    else:
        result = {
            "account_activity_list": [{
                "account_id":
                account_id,
                "data_set_id":
                data_set_id,
                "account_start_datetime":
                start_datetime,
                "account_end_datetime":
                end_datetime,
                "activities":
                get_emailer_attachment_activity(data_set_id,
                                                account_id,
                                                (start_datetime, end_datetime),
                                                interval="week")
            } for account_id in email_address_list]
        }

    return result
Ejemplo n.º 17
0
def exif_emails(*args, **kwargs):
    tangelo.log("geo.exif_emails(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    tangelo.content_type("application/json")

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    qs = parseParamTextQuery(**kwargs)

    return es_get_exif_emails(data_set_id, size)
Ejemplo n.º 18
0
def getAccountActivity(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getAccountActivity(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    account_type = urllib.unquote(nth(args, 0, ''))
    if not account_type:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing account_type")

    email_address_list = parseParamEmailAddress(**kwargs)

    if not email_address_list:
        result = {
            "account_activity_list": [{
                "account_id":
                data_set_id,
                "data_set_id":
                data_set_id,
                "account_start_datetime":
                start_datetime,
                "account_end_datetime":
                end_datetime,
                "activities":
                get_email_activity(data_set_id,
                                   data_set_id,
                                   date_bounds=(start_datetime, end_datetime),
                                   interval="week")
            }]
        }
    else:
        result = {
            "account_activity_list": [{
                "account_id":
                account_id,
                "data_set_id":
                data_set_id,
                "account_start_datetime":
                start_datetime,
                "account_end_datetime":
                end_datetime,
                "activities":
                get_email_activity(data_set_id,
                                   data_set_id,
                                   account_id,
                                   date_bounds=(start_datetime, end_datetime),
                                   interval="week")
            } for account_id in email_address_list]
        }

    return result
Ejemplo n.º 19
0
def getAllAttachmentBySender(*args, **kwargs):
    tangelo.log("getAttachmentsSender(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)
    sender=nth(args, 0, '')
    if not data_set_id:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing data_set_id")
    if not sender:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing sender")

    tangelo.content_type("application/json")

    return get_attachments_by_sender(data_set_id, sender, start_datetime, end_datetime, size )
Ejemplo n.º 20
0
def exportStarred(*args, **kwargs):
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)
    # TODO set from UI
    query_terms=''
    email_address_list = []

    query = _build_email_query(email_addrs=email_address_list, qs=query_terms, date_bounds=(start_datetime, end_datetime), starred=True)
    tangelo.log("email.exportStarred(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    email_ids = [hit["num"] for hit in results["hits"]]
    return export_emails_archive(data_set_id, email_ids)
Ejemplo n.º 21
0
def setStarred(*args, **kwargs):
    tangelo.log("setStarred(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    tangelo.content_type("application/json")

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)

    email_id = args[-1]
    if not email_id:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing email_id")

    starred = parseParamStarred(**kwargs)

    return set_starred(data_set_id, [email_id], starred)
Ejemplo n.º 22
0
def getEmail(*args, **kwargs):
    tangelo.log("getEmail(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    tangelo.content_type("application/json")

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)

    qs = parseParamTextQuery(**kwargs)

    email_id = args[-1]
    if not email_id:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing email_id")

    return get_email(data_set_id, email_id, qs)
Ejemplo n.º 23
0
def search(*path_args, **param_args):
    tangelo.content_type("application/json")
    tangelo.log("search.search(path_args[%s] %s)" %
                (len(path_args), str(path_args)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **param_args)

    # TODO this needs to come from UI
    size = size if size > 500 else 2500

    # TODO make sure that the qs param is put on the query
    qs = parseParamTextQuery(**param_args)

    #re-direct based on field
    if (path_args[0] == "text") or (path_args[0] == "all"):
        if len(path_args) == 1:
            return {"graph": {"nodes": [], "links": []}, "rows": []}
        elif len(path_args) >= 2:
            # TODO remove hacky path_args - should come from params
            qs = urllib.unquote(nth(path_args, 1, ''))
            return get_top_email_by_text_query(data_set_id, qs, start_datetime,
                                               end_datetime, size)
    elif path_args[0] == "email":
        if len(path_args) == 1:
            return {"graph": {"nodes": [], "links": []}, "rows": []}
        elif len(path_args) >= 2:
            # TODO remove hacky path_args - should come from params
            email_address = urllib.unquote(nth(path_args, 1, ''))
            return es_get_all_email_by_address(data_set_id, email_address, qs,
                                               start_datetime, end_datetime,
                                               size)
    # TODO REMOVEV this call
    # elif path_args[0] == "entity":
    #     return get_graph_by_entity(*path_args, **param_args)
    # TODO clean up this method
    elif path_args[0] == "topic":
        if len(path_args) == 1:
            return {"graph": {"nodes": [], "links": []}, "rows": []}
        elif len(path_args) >= 2:
            #TODO implement search by topic
            return {"graph": {"nodes": [], "links": []}, "rows": []}
    elif path_args[0] == "community":
        if len(path_args) == 1:
            return {"graph": {"nodes": [], "links": []}, "rows": []}
        elif len(path_args) >= 2:
            #TODO implement search by community
            return {"graph": {"nodes": [], "links": []}, "rows": []}
    return {"graph": {"nodes": [], "links": []}, "rows": []}
Ejemplo n.º 24
0
def getDomains(*args, **kwargs):
    tangelo.log("getDomains(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    tangelo.content_type("application/json")
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    #top_count = int(urllib.unquote(nth(args, 0, "40")))
    top_count = int(size)

    return {
        "domains":
        get_top_domains(data_set_id,
                        date_bounds=(start_datetime, end_datetime),
                        num_domains=top_count)[:top_count]
    }
Ejemplo n.º 25
0
def getEmail(*args, **kwargs):
    tangelo.log("getEmail(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    tangelo.content_type("application/json")

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    qs = parseParamTextQuery(**kwargs)

    email_id = args[-1]
    if not email_id:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing email_id")

    return get_email(data_set_id, email_id, qs)
Ejemplo n.º 26
0
def setStarred(*args, **kwargs):
    tangelo.log("setStarred(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    tangelo.content_type("application/json")

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    email_id = args[-1]
    if not email_id:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing email_id")

    starred = parseParamStarred(**kwargs)

    return set_starred(data_set_id, [email_id], starred)
Ejemplo n.º 27
0
def exportStarred(*args, **kwargs):
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)
    # TODO set from UI
    query_terms = ''
    email_address_list = []

    query = _build_email_query(email_addrs=email_address_list,
                               qs=query_terms,
                               date_bounds=(start_datetime, end_datetime),
                               starred=True)
    tangelo.log("email.exportStarred(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    email_ids = [hit["num"] for hit in results["hits"]]
    return export_emails_archive(data_set_id, email_ids)
Ejemplo n.º 28
0
def getAllAttachmentBySender(*args, **kwargs):
    tangelo.log("getAttachmentsSender(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)
    sender = nth(args, 0, '')
    if not data_set_id:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing data_set_id")
    if not sender:
        return tangelo.HTTPStatusCode(400,
                                      "invalid service call - missing sender")

    tangelo.content_type("application/json")

    return get_attachments_by_sender(data_set_id, sender, start_datetime,
                                     end_datetime, size)
Ejemplo n.º 29
0
def search_email_by_address_set(*path_args, **param_args):
    tangelo.content_type("application/json")
    tangelo.log("search.search_email_by_address_set(path_args[%s] %s)" %
                (len(path_args), str(path_args)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **param_args)
    # TODO: set from UI
    size = param_args.get('size', 2500)

    # parse the sender address and the recipient address
    sender_address_list, recipient_address_list = parseParamAllSenderAllRecipient(
        **param_args)

    return es_get_all_email_by_address_set(data_set_id, sender_address_list,
                                           recipient_address_list,
                                           start_datetime, end_datetime, size)
Ejemplo n.º 30
0
def search_email_by_conversation_forward_backward(*path_args, **param_args):
    tangelo.content_type("application/json")
    tangelo.log("search.search_email_by_address_set(path_args[%s] %s)" %
                (len(path_args), str(path_args)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **param_args)
    # TODO: set from UI
    size = param_args.get('size', 2500)

    order = param_args.get('order', 'next')
    order = 'desc' if order == 'prev' else 'asc'

    # parse the sender address and the recipient address
    sender_address_list, recipient_address_list = parseParamAllSenderAllRecipient(
        **param_args)

    return es_get_all_email_by_conversation_forward_backward(
        data_set_id, sender_address_list, recipient_address_list,
        start_datetime, end_datetime, size, order)
Ejemplo n.º 31
0
def get_attachment_by_id(*args, **kwargs):

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)

    attachment_id=nth(args, 0, '')

    if not attachment_id:
        attachment_id = parseParamAttachmentGUID(**kwargs)

    cherrypy.log("email.get_attachments_sender(index=%s, attachment_id=%s)" % (data_set_id, attachment_id))
    if not data_set_id:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing index")
    if not attachment_id:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing attachment_id")

    attachment = es().get(index=data_set_id, doc_type="attachments", id=attachment_id)

    if not attachment:
        return tangelo.HTTPStatusCode(400, "no attachments found for (index=%s, attachment_id=%s)" % (data_set_id, attachment_id))

    attachment = attachment["_source"]
    ext = attachment["extension"]
    filename = attachment["filename"]

    mime_type = mimetypes.guess_type(filename)[0]

    if not mime_type:
        tangelo.content_type("application/x-download")
        header("Content-Disposition", 'attachment; filename="{}"'.format(filename))
    else:
        tangelo.content_type(mime_type)
        header("Content-Disposition", 'inline; filename="{}"'.format(filename))

    content = attachment["contents64"]
    bytes = base64.b64decode(content)
    # dump(bytes, filename)

    as_str = str(bytes)
    tangelo.log(str(len(as_str)), "Uploading Attachment - length = ")

    return as_str
Ejemplo n.º 32
0
def get_top_entities(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("entity.get_top_entities(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    top_count = int(urllib.unquote(nth(args, 0, "20")))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)
    email_address_list = parseParamEmailAddress(**kwargs)

    # TODO set from UI
    qs = parseParamTextQuery(**kwargs)

    if not email_address_list:
        # TODO qs not being evaluated in inner filter called by this method
        entities = get_entity_histogram(data_set_id,
                                        "emails",
                                        qs=qs,
                                        date_bounds=(start_datetime,
                                                     end_datetime))[:top_count]
        result = {
            "entities":
            [[str(i), entity["type"], entity["key"], entity["doc_count"]]
             for i, entity in enumerate(entities)]
        }

    else:
        # TODO qs not being evaluated in inner filter called by this method
        entities = get_entity_histogram(data_set_id,
                                        "emails",
                                        email_address_list,
                                        qs=qs,
                                        date_bounds=(start_datetime,
                                                     end_datetime))[:top_count]
        result = {
            "entities":
            [[str(i), entity["type"], entity["key"], entity["doc_count"]]
             for i, entity in enumerate(entities)]
        }

    return result
Ejemplo n.º 33
0
def getAttachCount(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getAttachCount(args: %s kwargs: %s)" % (str(args), str(kwargs)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)

    attach_type = urllib.unquote(nth(args, 0, ''))
    if not attach_type:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing attach_type")

    attach_type = 'all' #hack for now
    email_address_list = parseParamEmailAddress(**kwargs);

    if not email_address_list :
        activity = get_total_attachment_activity(data_set_id, data_set_id, query_function=attachment_histogram, sender_email_addr="", start=start_datetime, end=end_datetime, interval="week")
        result = {"account_activity_list" :
                  [
                   {
                    "account_id" : data_set_id,
                    "data_set_id" : data_set_id,
                    "account_start_datetime" : start_datetime,
                    "account_end_datetime" : end_datetime,
                    "activities" : activity
                   }
                  ]
                 }

    else:
        result = {"account_activity_list" :
                  [
                   {
                    "account_id" : account_id,
                    "data_set_id" : data_set_id,
                    "account_start_datetime" : start_datetime,
                    "account_end_datetime" : end_datetime,
                    "activities" : get_emailer_attachment_activity(data_set_id, account_id, (start_datetime, end_datetime), interval="week")
                   } for account_id in email_address_list
                  ]
                 }

    return result
Ejemplo n.º 34
0
def listAllDataSet():

    tangelo.log("datasource.listAllDataSet()")

    # Ignore index keys in ES that are not in the newman_app.conf
    # Find all the indexes that begin with the index loader prefix
    indexes = [_index_record(index) for index in index_list() if index in data_set_names() or index.startswith(index_creator_prefix())]

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**{})

    email_addrs = get_ranked_email_address_from_email_addrs_index(data_set_id, start_datetime, end_datetime, size)["emails"]
    email_addrs = {email_addr[0]:email_addr for email_addr in email_addrs}

    return {
            "data_set_selected": getDefaultDataSetID(),
            "data_sets": indexes,
            "top_hits": {
                         "order_by":"rank",
                         "email_addrs": email_addrs
                        }
           }
Ejemplo n.º 35
0
def get_top_entities(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("entity.get_top_entities(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    top_count=int(urllib.unquote(nth(args, 0, "20")))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)
    email_address_list = parseParamEmailAddress(**kwargs);

    # TODO set from UI
    qs = parseParamTextQuery(**kwargs)

    if not email_address_list :
        # TODO qs not being evaluated in inner filter called by this method
        entities = get_entity_histogram(data_set_id, "emails", qs=qs, date_bounds=(start_datetime, end_datetime))[:top_count]
        result = {"entities" :
                  [
                   [
                    str(i),
                    entity ["type"],
                    entity ["key"],
                    entity ["doc_count"]
                   ] for i,entity in enumerate(entities)
                  ]
                 }
        
    else:
        # TODO qs not being evaluated in inner filter called by this method
        entities = get_entity_histogram(data_set_id, "emails", email_address_list, qs=qs, date_bounds=(start_datetime, end_datetime))[:top_count]
        result = {"entities" :
                  [
                   [
                    str(i),
                    entity ["type"],
                    entity ["key"],
                    entity ["doc_count"]
                   ] for i,entity in enumerate(entities)
                  ]
                 }

    return result    
Ejemplo n.º 36
0
def getAccountActivity(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getAccountActivity(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)

    account_type = urllib.unquote(nth(args, 0, ''))
    if not account_type:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing account_type")

    email_address_list = parseParamEmailAddress(**kwargs);

    if not email_address_list :
        result = {"account_activity_list" :
                  [
                   {
                    "account_id" : data_set_id,
                    "data_set_id" : data_set_id,
                    "account_start_datetime" : start_datetime,
                    "account_end_datetime" : end_datetime,
                    "activities" : get_email_activity(data_set_id, data_set_id, date_bounds=(start_datetime, end_datetime), interval="week")
                   }
                  ]
                 }
    else:
        result = {"account_activity_list" :
                  [
                   {
                    "account_id" : account_id,
                    "data_set_id" : data_set_id,
                    "account_start_datetime" : start_datetime,
                    "account_end_datetime" : end_datetime,
                    "activities" : get_email_activity(data_set_id, data_set_id, account_id, date_bounds=(start_datetime, end_datetime), interval="week")
                   } for account_id in email_address_list
                  ]
                 }


    return result
Ejemplo n.º 37
0
def getAttachFileType(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getAttachFileType(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    top_count = int(size)

    attach_type = urllib.unquote(nth(args, 0, ''))
    if not attach_type:
        attach_type = 'all'  #hack for now

    email_address_list = parseParamEmailAddress(**kwargs)

    if not email_address_list:
        file_types = get_top_attachment_types(
            data_set_id,
            date_bounds=(start_datetime, end_datetime),
            num_top_attachments=top_count)[:top_count]
    else:
        #TODO: implement populating the attachment file-types under individual email-accounts; simulate result for now
        file_types = get_top_attachment_types(
            data_set_id,
            date_bounds=(start_datetime, end_datetime),
            num_top_attachments=top_count)[:top_count]

    result = {
        "account_id": data_set_id,
        "data_set_id": data_set_id,
        "account_start_datetime": start_datetime,
        "account_end_datetime": end_datetime,
        "types": file_types
    }

    return result
Ejemplo n.º 38
0
def getRankedEmails(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getRankedEmails(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)

    return get_ranked_email_address_from_email_addrs_index(data_set_id, start_datetime, end_datetime, size)
Ejemplo n.º 39
0
def exportMany(*args, **kwargs):
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)
    email_ids = parseParamEmailIds(**kwargs)
    return export_emails_archive(data_set_id, email_ids)
Ejemplo n.º 40
0
def exportMany(*args, **kwargs):
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)
    email_ids = parseParamEmailIds(**kwargs)
    return export_emails_archive(data_set_id, email_ids)