def search_email_by_topic(*args, **param_args): tangelo.content_type("application/json") tangelo.log("search_email_by_topic(args: %s kwargs: %s)" % (str(args), str(param_args))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **param_args) # TODO: set from UI size = param_args.get('size', 2500) if not data_set_id: return tangelo.HTTPStatusCode( 400, "invalid service call - missing data_set_id") if not param_args.get("topic_index"): return tangelo.HTTPStatusCode( 400, "invalid service call - missing topic_index") topic = parseParamTopic(**param_args) email_addrs = parseParam_email_addr(**param_args) qs = parseParamTextQuery(**param_args) return es_get_all_email_by_topic(data_set_id, topic=topic, email_addrs=email_addrs, qs=qs, start_datetime=start_datetime, end_datetime=end_datetime, size=size)
def get_graph_for_entity(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("entity.get_graph_for_entity(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) email_address_list = parseParamEmailAddress(**kwargs) entity_dict = parseParamEntity(**kwargs) # TODO set from UI size = size if size > 500 else 2500 qs = parseParamTextQuery(**kwargs) query = _build_email_query(email_addrs=email_address_list, qs=qs, entity=entity_dict, date_bounds=(start_datetime, end_datetime)) tangelo.log("entity.get_graph_for_entity(query: %s)" % (query)) results = _query_emails(data_set_id, size, query) graph = _build_graph_for_emails(data_set_id, results["hits"], results["total"]) # Get attachments for community query = _build_email_query(email_addrs=email_address_list, qs=qs, entity=entity_dict, date_bounds=(start_datetime, end_datetime), attachments_only=True) tangelo.log("entity.get_graph_by_entity(attachment-query: %s)" % (query)) attachments = _query_email_attachments(data_set_id, size, query) graph["attachments"] = attachments return graph
def search_email_by_community(*args, **param_args): tangelo.content_type("application/json") tangelo.log("search_email_by_community(args: %s kwargs: %s)" % (str(args), str(param_args))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **param_args) community = nth(args, 0, '') # TODO: set from UI size = param_args.get('size', 2500) if not data_set_id: return tangelo.HTTPStatusCode( 400, "invalid service call - missing data_set_id") if not community: return tangelo.HTTPStatusCode(400, "invalid service call - missing sender") email_addrs = parseParam_email_addr(**param_args) qs = parseParamTextQuery(**param_args) return es_get_all_email_by_community(data_set_id, community, email_addrs, qs, start_datetime, end_datetime, size)
def get_graph_for_entity(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("entity.get_graph_for_entity(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) email_address_list = parseParamEmailAddress(**kwargs); entity_dict = parseParamEntity(**kwargs) # TODO set from UI size = size if size >500 else 2500 qs = parseParamTextQuery(**kwargs) query = _build_email_query(email_addrs=email_address_list, qs=qs, entity=entity_dict, date_bounds=(start_datetime, end_datetime)) tangelo.log("entity.get_graph_for_entity(query: %s)" % (query)) results = _query_emails(data_set_id, size, query) graph = _build_graph_for_emails(data_set_id, results["hits"], results["total"]) # Get attachments for community query = _build_email_query(email_addrs=email_address_list, qs=qs, entity=entity_dict, date_bounds=(start_datetime, end_datetime), attachments_only=True) tangelo.log("entity.get_graph_by_entity(attachment-query: %s)" % (query)) attachments = _query_email_attachments(data_set_id, size, query) graph["attachments"] = attachments return graph
def exif_emails(*args, **kwargs): tangelo.log("geo.exif_emails(args: %s kwargs: %s)" % (str(args), str(kwargs))) tangelo.content_type("application/json") data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) qs = parseParamTextQuery(**kwargs) return es_get_exif_emails(data_set_id, size)
def getRankedAddressesWithTextSearch(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("getRankedAddresses(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) qs = parseParamTextQuery(**kwargs) # TODO this needs to come from UI size = size if size > 500 else 2500 text_search_graph = get_top_email_by_text_query(data_set_id, qs, start_datetime, end_datetime, size) text_search = { "text_search_url_path": qs, "parameter": kwargs, "search_result": { "mail_sent_count": "N/A", "mail_received_count": "N/A", "mail_attachment_count": len(text_search_graph["attachments"]), "query_matched_count": text_search_graph["query_hits"], "associated_count": len(text_search_graph["graph"]["nodes"]) }, "TEMPORARY_GRAPH": text_search_graph } ranked_addresses = get_ranked_email_address_from_email_addrs_index( data_set_id, start_datetime, end_datetime, size) text_search["top_address_list"] = [] for i, email_address in enumerate(ranked_addresses["emails"]): graph = es_get_all_email_by_address(data_set_id, email_address[0], qs, start_datetime, end_datetime, size) text_search["top_address_list"].append({ "address_search_url_path": email_address[0], "parameters": kwargs, "search_results": { "mail_sent_count": email_address[6], "mail_received_count": email_address[5], "mail_attachment_count": email_address[7], "query_matched_count": graph["query_hits"], "associated_count": len(graph["graph"]["nodes"]) }, "TEMPORARY_GRAPH": graph }) return {"text_search_list": text_search}
def getEmail(*args, **kwargs): tangelo.log("getEmail(args: %s kwargs: %s)" % (str(args), str(kwargs))) tangelo.content_type("application/json") data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) qs = parseParamTextQuery(**kwargs) email_id = args[-1] if not email_id: return tangelo.HTTPStatusCode(400, "invalid service call - missing email_id") return get_email(data_set_id, email_id, qs)
def search(*path_args, **param_args): tangelo.content_type("application/json") tangelo.log("search.search(path_args[%s] %s)" % (len(path_args), str(path_args))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **param_args) # TODO this needs to come from UI size = size if size > 500 else 2500 # TODO make sure that the qs param is put on the query qs = parseParamTextQuery(**param_args) #re-direct based on field if (path_args[0] == "text") or (path_args[0] == "all"): if len(path_args) == 1: return {"graph": {"nodes": [], "links": []}, "rows": []} elif len(path_args) >= 2: # TODO remove hacky path_args - should come from params qs = urllib.unquote(nth(path_args, 1, '')) return get_top_email_by_text_query(data_set_id, qs, start_datetime, end_datetime, size) elif path_args[0] == "email": if len(path_args) == 1: return {"graph": {"nodes": [], "links": []}, "rows": []} elif len(path_args) >= 2: # TODO remove hacky path_args - should come from params email_address = urllib.unquote(nth(path_args, 1, '')) return es_get_all_email_by_address(data_set_id, email_address, qs, start_datetime, end_datetime, size) # TODO REMOVEV this call # elif path_args[0] == "entity": # return get_graph_by_entity(*path_args, **param_args) # TODO clean up this method elif path_args[0] == "topic": if len(path_args) == 1: return {"graph": {"nodes": [], "links": []}, "rows": []} elif len(path_args) >= 2: #TODO implement search by topic return {"graph": {"nodes": [], "links": []}, "rows": []} elif path_args[0] == "community": if len(path_args) == 1: return {"graph": {"nodes": [], "links": []}, "rows": []} elif len(path_args) >= 2: #TODO implement search by community return {"graph": {"nodes": [], "links": []}, "rows": []} return {"graph": {"nodes": [], "links": []}, "rows": []}
def getEmail(*args, **kwargs): tangelo.log("getEmail(args: %s kwargs: %s)" % (str(args), str(kwargs))) tangelo.content_type("application/json") data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) qs = parseParamTextQuery(**kwargs) email_id = args[-1] if not email_id: return tangelo.HTTPStatusCode( 400, "invalid service call - missing email_id") return get_email(data_set_id, email_id, qs)
def getRankedAddressesWithTextSearch(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("getRankedAddresses(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) qs = parseParamTextQuery(**kwargs) # TODO this needs to come from UI size = size if size >500 else 2500 text_search_graph = get_top_email_by_text_query(data_set_id, qs, start_datetime, end_datetime, size) text_search = { "text_search_url_path": qs, "parameter": kwargs, "search_result": { "mail_sent_count": "N/A", "mail_received_count": "N/A", "mail_attachment_count": len(text_search_graph["attachments"]), "query_matched_count" : text_search_graph["query_hits"], "associated_count" : len(text_search_graph["graph"]["nodes"]) }, "TEMPORARY_GRAPH" : text_search_graph } ranked_addresses = get_ranked_email_address_from_email_addrs_index(data_set_id, start_datetime, end_datetime, size) text_search["top_address_list"] = [] for i, email_address in enumerate(ranked_addresses["emails"]): graph = es_get_all_email_by_address(data_set_id, email_address[0], qs, start_datetime, end_datetime, size ) text_search["top_address_list"].append({ "address_search_url_path" : email_address[0], "parameters" : kwargs, "search_results" : { "mail_sent_count" : email_address[6], "mail_received_count" : email_address[5], "mail_attachment_count" : email_address[7], "query_matched_count" : graph["query_hits"], "associated_count" : len(graph["graph"]["nodes"]) }, "TEMPORARY_GRAPH" : graph }) return {"text_search_list" : text_search}
def get_top_entities(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("entity.get_top_entities(args: %s kwargs: %s)" % (str(args), str(kwargs))) top_count = int(urllib.unquote(nth(args, 0, "20"))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) email_address_list = parseParamEmailAddress(**kwargs) # TODO set from UI qs = parseParamTextQuery(**kwargs) if not email_address_list: # TODO qs not being evaluated in inner filter called by this method entities = get_entity_histogram(data_set_id, "emails", qs=qs, date_bounds=(start_datetime, end_datetime))[:top_count] result = { "entities": [[str(i), entity["type"], entity["key"], entity["doc_count"]] for i, entity in enumerate(entities)] } else: # TODO qs not being evaluated in inner filter called by this method entities = get_entity_histogram(data_set_id, "emails", email_address_list, qs=qs, date_bounds=(start_datetime, end_datetime))[:top_count] result = { "entities": [[str(i), entity["type"], entity["key"], entity["doc_count"]] for i, entity in enumerate(entities)] } return result
def get_top_entities(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("entity.get_top_entities(args: %s kwargs: %s)" % (str(args), str(kwargs))) top_count=int(urllib.unquote(nth(args, 0, "20"))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) email_address_list = parseParamEmailAddress(**kwargs); # TODO set from UI qs = parseParamTextQuery(**kwargs) if not email_address_list : # TODO qs not being evaluated in inner filter called by this method entities = get_entity_histogram(data_set_id, "emails", qs=qs, date_bounds=(start_datetime, end_datetime))[:top_count] result = {"entities" : [ [ str(i), entity ["type"], entity ["key"], entity ["doc_count"] ] for i,entity in enumerate(entities) ] } else: # TODO qs not being evaluated in inner filter called by this method entities = get_entity_histogram(data_set_id, "emails", email_address_list, qs=qs, date_bounds=(start_datetime, end_datetime))[:top_count] result = {"entities" : [ [ str(i), entity ["type"], entity ["key"], entity ["doc_count"] ] for i,entity in enumerate(entities) ] } return result