Exemple #1
0
def search_email_by_topic(*args, **param_args):
    tangelo.content_type("application/json")
    tangelo.log("search_email_by_topic(args: %s kwargs: %s)" %
                (str(args), str(param_args)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **param_args)

    # TODO: set from UI
    size = param_args.get('size', 2500)

    if not data_set_id:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing data_set_id")

    if not param_args.get("topic_index"):
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing topic_index")
    topic = parseParamTopic(**param_args)

    email_addrs = parseParam_email_addr(**param_args)

    qs = parseParamTextQuery(**param_args)

    return es_get_all_email_by_topic(data_set_id,
                                     topic=topic,
                                     email_addrs=email_addrs,
                                     qs=qs,
                                     start_datetime=start_datetime,
                                     end_datetime=end_datetime,
                                     size=size)
Exemple #2
0
def get_graph_for_entity(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("entity.get_graph_for_entity(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)
    email_address_list = parseParamEmailAddress(**kwargs)
    entity_dict = parseParamEntity(**kwargs)
    # TODO set from UI
    size = size if size > 500 else 2500

    qs = parseParamTextQuery(**kwargs)

    query = _build_email_query(email_addrs=email_address_list,
                               qs=qs,
                               entity=entity_dict,
                               date_bounds=(start_datetime, end_datetime))
    tangelo.log("entity.get_graph_for_entity(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    graph = _build_graph_for_emails(data_set_id, results["hits"],
                                    results["total"])

    # Get attachments for community
    query = _build_email_query(email_addrs=email_address_list,
                               qs=qs,
                               entity=entity_dict,
                               date_bounds=(start_datetime, end_datetime),
                               attachments_only=True)
    tangelo.log("entity.get_graph_by_entity(attachment-query: %s)" % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments

    return graph
Exemple #3
0
def search_email_by_community(*args, **param_args):
    tangelo.content_type("application/json")
    tangelo.log("search_email_by_community(args: %s kwargs: %s)" %
                (str(args), str(param_args)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **param_args)
    community = nth(args, 0, '')

    # TODO: set from UI
    size = param_args.get('size', 2500)

    if not data_set_id:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing data_set_id")
    if not community:
        return tangelo.HTTPStatusCode(400,
                                      "invalid service call - missing sender")

    email_addrs = parseParam_email_addr(**param_args)

    qs = parseParamTextQuery(**param_args)

    return es_get_all_email_by_community(data_set_id, community, email_addrs,
                                         qs, start_datetime, end_datetime,
                                         size)
Exemple #4
0
def get_graph_for_entity(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("entity.get_graph_for_entity(args: %s kwargs: %s)" % (str(args), str(kwargs)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)
    email_address_list = parseParamEmailAddress(**kwargs);
    entity_dict = parseParamEntity(**kwargs)
    # TODO set from UI
    size = size if size >500 else 2500

    qs = parseParamTextQuery(**kwargs)

    query = _build_email_query(email_addrs=email_address_list, qs=qs, entity=entity_dict, date_bounds=(start_datetime, end_datetime))
    tangelo.log("entity.get_graph_for_entity(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    graph = _build_graph_for_emails(data_set_id, results["hits"], results["total"])

    # Get attachments for community
    query = _build_email_query(email_addrs=email_address_list, qs=qs, entity=entity_dict, date_bounds=(start_datetime, end_datetime), attachments_only=True)
    tangelo.log("entity.get_graph_by_entity(attachment-query: %s)" % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments

    return graph
Exemple #5
0
def exif_emails(*args, **kwargs):
    tangelo.log("geo.exif_emails(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    tangelo.content_type("application/json")

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    qs = parseParamTextQuery(**kwargs)

    return es_get_exif_emails(data_set_id, size)
Exemple #6
0
def getRankedAddressesWithTextSearch(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getRankedAddresses(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)
    qs = parseParamTextQuery(**kwargs)

    # TODO this needs to come from UI
    size = size if size > 500 else 2500

    text_search_graph = get_top_email_by_text_query(data_set_id, qs,
                                                    start_datetime,
                                                    end_datetime, size)

    text_search = {
        "text_search_url_path": qs,
        "parameter": kwargs,
        "search_result": {
            "mail_sent_count": "N/A",
            "mail_received_count": "N/A",
            "mail_attachment_count": len(text_search_graph["attachments"]),
            "query_matched_count": text_search_graph["query_hits"],
            "associated_count": len(text_search_graph["graph"]["nodes"])
        },
        "TEMPORARY_GRAPH": text_search_graph
    }

    ranked_addresses = get_ranked_email_address_from_email_addrs_index(
        data_set_id, start_datetime, end_datetime, size)
    text_search["top_address_list"] = []
    for i, email_address in enumerate(ranked_addresses["emails"]):
        graph = es_get_all_email_by_address(data_set_id, email_address[0], qs,
                                            start_datetime, end_datetime, size)

        text_search["top_address_list"].append({
            "address_search_url_path":
            email_address[0],
            "parameters":
            kwargs,
            "search_results": {
                "mail_sent_count": email_address[6],
                "mail_received_count": email_address[5],
                "mail_attachment_count": email_address[7],
                "query_matched_count": graph["query_hits"],
                "associated_count": len(graph["graph"]["nodes"])
            },
            "TEMPORARY_GRAPH":
            graph
        })

    return {"text_search_list": text_search}
Exemple #7
0
def getEmail(*args, **kwargs):
    tangelo.log("getEmail(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    tangelo.content_type("application/json")

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)

    qs = parseParamTextQuery(**kwargs)

    email_id = args[-1]
    if not email_id:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing email_id")

    return get_email(data_set_id, email_id, qs)
Exemple #8
0
def search(*path_args, **param_args):
    tangelo.content_type("application/json")
    tangelo.log("search.search(path_args[%s] %s)" %
                (len(path_args), str(path_args)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **param_args)

    # TODO this needs to come from UI
    size = size if size > 500 else 2500

    # TODO make sure that the qs param is put on the query
    qs = parseParamTextQuery(**param_args)

    #re-direct based on field
    if (path_args[0] == "text") or (path_args[0] == "all"):
        if len(path_args) == 1:
            return {"graph": {"nodes": [], "links": []}, "rows": []}
        elif len(path_args) >= 2:
            # TODO remove hacky path_args - should come from params
            qs = urllib.unquote(nth(path_args, 1, ''))
            return get_top_email_by_text_query(data_set_id, qs, start_datetime,
                                               end_datetime, size)
    elif path_args[0] == "email":
        if len(path_args) == 1:
            return {"graph": {"nodes": [], "links": []}, "rows": []}
        elif len(path_args) >= 2:
            # TODO remove hacky path_args - should come from params
            email_address = urllib.unquote(nth(path_args, 1, ''))
            return es_get_all_email_by_address(data_set_id, email_address, qs,
                                               start_datetime, end_datetime,
                                               size)
    # TODO REMOVEV this call
    # elif path_args[0] == "entity":
    #     return get_graph_by_entity(*path_args, **param_args)
    # TODO clean up this method
    elif path_args[0] == "topic":
        if len(path_args) == 1:
            return {"graph": {"nodes": [], "links": []}, "rows": []}
        elif len(path_args) >= 2:
            #TODO implement search by topic
            return {"graph": {"nodes": [], "links": []}, "rows": []}
    elif path_args[0] == "community":
        if len(path_args) == 1:
            return {"graph": {"nodes": [], "links": []}, "rows": []}
        elif len(path_args) >= 2:
            #TODO implement search by community
            return {"graph": {"nodes": [], "links": []}, "rows": []}
    return {"graph": {"nodes": [], "links": []}, "rows": []}
Exemple #9
0
def getEmail(*args, **kwargs):
    tangelo.log("getEmail(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    tangelo.content_type("application/json")

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    qs = parseParamTextQuery(**kwargs)

    email_id = args[-1]
    if not email_id:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing email_id")

    return get_email(data_set_id, email_id, qs)
Exemple #10
0
def getRankedAddressesWithTextSearch(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getRankedAddresses(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)
    qs = parseParamTextQuery(**kwargs)

    # TODO this needs to come from UI
    size = size if size >500 else 2500

    text_search_graph = get_top_email_by_text_query(data_set_id, qs, start_datetime, end_datetime, size)

    text_search = {
        "text_search_url_path": qs,
        "parameter": kwargs,
        "search_result": {
            "mail_sent_count": "N/A",
            "mail_received_count": "N/A",
            "mail_attachment_count": len(text_search_graph["attachments"]),
            "query_matched_count" : text_search_graph["query_hits"],
            "associated_count" : len(text_search_graph["graph"]["nodes"])
        },
        "TEMPORARY_GRAPH" : text_search_graph
    }

    ranked_addresses = get_ranked_email_address_from_email_addrs_index(data_set_id, start_datetime, end_datetime, size)
    text_search["top_address_list"] = []
    for i, email_address in enumerate(ranked_addresses["emails"]):
        graph = es_get_all_email_by_address(data_set_id, email_address[0], qs, start_datetime, end_datetime, size )

        text_search["top_address_list"].append({
            "address_search_url_path" : email_address[0],
            "parameters" : kwargs,
            "search_results" : {
                "mail_sent_count" : email_address[6],
                "mail_received_count" : email_address[5],
                "mail_attachment_count" : email_address[7],
                "query_matched_count" : graph["query_hits"],
                "associated_count" : len(graph["graph"]["nodes"])
            },
            "TEMPORARY_GRAPH" : graph
        })


    return {"text_search_list" : text_search}
Exemple #11
0
def get_top_entities(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("entity.get_top_entities(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    top_count = int(urllib.unquote(nth(args, 0, "20")))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)
    email_address_list = parseParamEmailAddress(**kwargs)

    # TODO set from UI
    qs = parseParamTextQuery(**kwargs)

    if not email_address_list:
        # TODO qs not being evaluated in inner filter called by this method
        entities = get_entity_histogram(data_set_id,
                                        "emails",
                                        qs=qs,
                                        date_bounds=(start_datetime,
                                                     end_datetime))[:top_count]
        result = {
            "entities":
            [[str(i), entity["type"], entity["key"], entity["doc_count"]]
             for i, entity in enumerate(entities)]
        }

    else:
        # TODO qs not being evaluated in inner filter called by this method
        entities = get_entity_histogram(data_set_id,
                                        "emails",
                                        email_address_list,
                                        qs=qs,
                                        date_bounds=(start_datetime,
                                                     end_datetime))[:top_count]
        result = {
            "entities":
            [[str(i), entity["type"], entity["key"], entity["doc_count"]]
             for i, entity in enumerate(entities)]
        }

    return result
Exemple #12
0
def get_top_entities(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("entity.get_top_entities(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    top_count=int(urllib.unquote(nth(args, 0, "20")))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)
    email_address_list = parseParamEmailAddress(**kwargs);

    # TODO set from UI
    qs = parseParamTextQuery(**kwargs)

    if not email_address_list :
        # TODO qs not being evaluated in inner filter called by this method
        entities = get_entity_histogram(data_set_id, "emails", qs=qs, date_bounds=(start_datetime, end_datetime))[:top_count]
        result = {"entities" :
                  [
                   [
                    str(i),
                    entity ["type"],
                    entity ["key"],
                    entity ["doc_count"]
                   ] for i,entity in enumerate(entities)
                  ]
                 }
        
    else:
        # TODO qs not being evaluated in inner filter called by this method
        entities = get_entity_histogram(data_set_id, "emails", email_address_list, qs=qs, date_bounds=(start_datetime, end_datetime))[:top_count]
        result = {"entities" :
                  [
                   [
                    str(i),
                    entity ["type"],
                    entity ["key"],
                    entity ["doc_count"]
                   ] for i,entity in enumerate(entities)
                  ]
                 }

    return result