def topic_list(*args, **kwargs): category = nth(args, 0, 'all') #tangelo.log("category %s" %(category)) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) tangelo.content_type("application/json") return get_categories(data_set_id)
def get_graph_for_entity(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("entity.get_graph_for_entity(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) email_address_list = parseParamEmailAddress(**kwargs) entity_dict = parseParamEntity(**kwargs) # TODO set from UI size = size if size > 500 else 2500 qs = parseParamTextQuery(**kwargs) query = _build_email_query(email_addrs=email_address_list, qs=qs, entity=entity_dict, date_bounds=(start_datetime, end_datetime)) tangelo.log("entity.get_graph_for_entity(query: %s)" % (query)) results = _query_emails(data_set_id, size, query) graph = _build_graph_for_emails(data_set_id, results["hits"], results["total"]) # Get attachments for community query = _build_email_query(email_addrs=email_address_list, qs=qs, entity=entity_dict, date_bounds=(start_datetime, end_datetime), attachments_only=True) tangelo.log("entity.get_graph_by_entity(attachment-query: %s)" % (query)) attachments = _query_email_attachments(data_set_id, size, query) graph["attachments"] = attachments return graph
def searchStarred(*args, **kwargs): tangelo.log("email.searchStarred(args: %s kwargs: %s)" % (str(args), str(kwargs))) tangelo.content_type("application/json") data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) size = size if size >500 else 2500 # TODO set from UI query_terms='' email_address_list = [] query = _build_email_query(email_addrs=email_address_list, qs=query_terms, date_bounds=(start_datetime, end_datetime), starred=True) tangelo.log("email.searchStarred(query: %s)" % (query)) results = _query_emails(data_set_id, size, query) graph = _build_graph_for_emails(data_set_id, results["hits"], results["total"]) # Get attachments for community query = _build_email_query(email_addrs=email_address_list, qs=query_terms, date_bounds=(start_datetime, end_datetime), attachments_only=True, starred=True) tangelo.log("email.searchStarred(attachment-query: %s)" % (query)) attachments = _query_email_attachments(data_set_id, size, query) graph["attachments"] = attachments return graph
def listAllDataSet(): tangelo.log("datasource.listAllDataSet()") # Ignore index keys in ES that are not in the newman_app.conf # Find all the indexes that begin with the index loader prefix indexes = [ _index_record(index) for index in index_list() if index in data_set_names() or index.startswith(index_creator_prefix()) ] data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**{}) email_addrs = get_ranked_email_address_from_email_addrs_index( data_set_id, start_datetime, end_datetime, size)["emails"] email_addrs = {email_addr[0]: email_addr for email_addr in email_addrs} return { "data_set_selected": getDefaultDataSetID(), "data_sets": indexes, "top_hits": { "order_by": "rank", "email_addrs": email_addrs } }
def searchStarred(*args, **kwargs): tangelo.log("email.searchStarred(args: %s kwargs: %s)" % (str(args), str(kwargs))) tangelo.content_type("application/json") data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) size = size if size > 500 else 2500 # TODO set from UI query_terms = '' email_address_list = [] query = _build_email_query(email_addrs=email_address_list, qs=query_terms, date_bounds=(start_datetime, end_datetime), starred=True) tangelo.log("email.searchStarred(query: %s)" % (query)) results = _query_emails(data_set_id, size, query) graph = _build_graph_for_emails(data_set_id, results["hits"], results["total"]) # Get attachments for community query = _build_email_query(email_addrs=email_address_list, qs=query_terms, date_bounds=(start_datetime, end_datetime), attachments_only=True, starred=True) tangelo.log("email.searchStarred(attachment-query: %s)" % (query)) attachments = _query_email_attachments(data_set_id, size, query) graph["attachments"] = attachments return graph
def getRankedAddresses(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("getRankedAddresses(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) # TODO - reminder no 'qs' here set to '' # qs = parseParamTextQuery(**kwargs) qs = '' # TODO this needs to come from UI size = size if size > 500 else 2500 ranked_addresses = get_ranked_email_address_from_email_addrs_index( data_set_id, start_datetime, end_datetime, size) top_address_list = [] for i, email_address in enumerate(ranked_addresses["emails"]): graph = es_get_all_email_by_address(data_set_id, email_address[0], qs, start_datetime, end_datetime, size) top_address_list.append({ "address_search_url_path": email_address[0], "parameters": kwargs, "search_results": { "mail_sent_count": email_address[6], "mail_received_count": email_address[5], "mail_attachment_count": email_address[7], "query_matched_count": graph["query_hits"], "associated_count": len(graph["graph"]["nodes"]) }, "TEMPORARY_GRAPH": graph }) return {"top_address_list": top_address_list}
def getAttachFileType(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("getAttachFileType(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) top_count = int(size) attach_type = urllib.unquote(nth(args, 0, '')) if not attach_type: attach_type = 'all' #hack for now email_address_list = parseParamEmailAddress(**kwargs); if not email_address_list : file_types = get_top_attachment_types(data_set_id, date_bounds=(start_datetime, end_datetime), num_top_attachments=top_count)[:top_count] else : #TODO: implement populating the attachment file-types under individual email-accounts; simulate result for now file_types = get_top_attachment_types(data_set_id, date_bounds=(start_datetime, end_datetime), num_top_attachments=top_count)[:top_count] result = { "account_id" : data_set_id, "data_set_id" : data_set_id, "account_start_datetime" : start_datetime, "account_end_datetime" : end_datetime, "types" : file_types } return result
def search_email_by_conversation(*path_args, **param_args): tangelo.content_type("application/json") tangelo.log("search.search_email_by_conversation(path_args[%s] %s)" % (len(path_args), str(path_args))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **param_args) # TODO: set from UI size = param_args.get('size', 2500) # parse the sender address and the recipient address sender_list = parseParamEmailSender(**param_args) cherrypy.log("\tsender_list: %s)" % str(sender_list)) recipient_list = parseParamEmailRecipient(**param_args) cherrypy.log("\trecipient_list: %s)" % str(recipient_list)) document_uid = parseParamDocumentUID(**param_args) cherrypy.log("\tdocument_uid: %s)" % str(document_uid)) document_datetime = parseParamDocumentDatetime(**param_args) cherrypy.log("\tdocument_datetime: %s)" % str(document_datetime)) if not document_datetime: return tangelo.HTTPStatusCode( 400, "invalid service call - missing mandatory param 'document_datetime'" ) sender_address, recipient_address = parseParamAllSenderAllRecipient( **param_args) return es_get_conversation(data_set_id, sender_address, recipient_address, start_datetime, end_datetime, size / 2, document_uid, document_datetime)
def getRankedAddresses(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("getRankedAddresses(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) # TODO - reminder no 'qs' here set to '' # qs = parseParamTextQuery(**kwargs) qs='' # TODO this needs to come from UI size = size if size >500 else 2500 ranked_addresses = get_ranked_email_address_from_email_addrs_index(data_set_id, start_datetime, end_datetime, size) top_address_list = [] for i, email_address in enumerate(ranked_addresses["emails"]): graph = es_get_all_email_by_address(data_set_id, email_address[0], qs, start_datetime, end_datetime, size ) top_address_list.append({ "address_search_url_path" : email_address[0], "parameters" : kwargs, "search_results" : { "mail_sent_count" : email_address[6], "mail_received_count" : email_address[5], "mail_attachment_count" : email_address[7], "query_matched_count" : graph["query_hits"], "associated_count" : len(graph["graph"]["nodes"]) }, "TEMPORARY_GRAPH" : graph }) return {"top_address_list" : top_address_list}
def search_email_by_community(*args, **param_args): tangelo.content_type("application/json") tangelo.log("search_email_by_community(args: %s kwargs: %s)" % (str(args), str(param_args))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **param_args) community = nth(args, 0, '') # TODO: set from UI size = param_args.get('size', 2500) if not data_set_id: return tangelo.HTTPStatusCode( 400, "invalid service call - missing data_set_id") if not community: return tangelo.HTTPStatusCode(400, "invalid service call - missing sender") email_addrs = parseParam_email_addr(**param_args) qs = parseParamTextQuery(**param_args) return es_get_all_email_by_community(data_set_id, community, email_addrs, qs, start_datetime, end_datetime, size)
def search_email_by_topic(*args, **param_args): tangelo.content_type("application/json") tangelo.log("search_email_by_topic(args: %s kwargs: %s)" % (str(args), str(param_args))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **param_args) # TODO: set from UI size = param_args.get('size', 2500) if not data_set_id: return tangelo.HTTPStatusCode( 400, "invalid service call - missing data_set_id") if not param_args.get("topic_index"): return tangelo.HTTPStatusCode( 400, "invalid service call - missing topic_index") topic = parseParamTopic(**param_args) email_addrs = parseParam_email_addr(**param_args) qs = parseParamTextQuery(**param_args) return es_get_all_email_by_topic(data_set_id, topic=topic, email_addrs=email_addrs, qs=qs, start_datetime=start_datetime, end_datetime=end_datetime, size=size)
def get_graph_for_entity(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("entity.get_graph_for_entity(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) email_address_list = parseParamEmailAddress(**kwargs); entity_dict = parseParamEntity(**kwargs) # TODO set from UI size = size if size >500 else 2500 qs = parseParamTextQuery(**kwargs) query = _build_email_query(email_addrs=email_address_list, qs=qs, entity=entity_dict, date_bounds=(start_datetime, end_datetime)) tangelo.log("entity.get_graph_for_entity(query: %s)" % (query)) results = _query_emails(data_set_id, size, query) graph = _build_graph_for_emails(data_set_id, results["hits"], results["total"]) # Get attachments for community query = _build_email_query(email_addrs=email_address_list, qs=qs, entity=entity_dict, date_bounds=(start_datetime, end_datetime), attachments_only=True) tangelo.log("entity.get_graph_by_entity(attachment-query: %s)" % (query)) attachments = _query_email_attachments(data_set_id, size, query) graph["attachments"] = attachments return graph
def get_topics_by_query(*args, **kwargs): tangelo.content_type("application/json") algorithm = kwargs.get('algorithm', 'lingo') data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) email_address_list = parseParamEmailAddress(**kwargs) # TODO ------------------------------------------------------------------------- # TODO REMEMBER TO EVALUATE QUERY TERMS -- VERY IMPORTANT for good clustering! # TODO ------------------------------------------------------------------------- query_terms = '' # TODO set from UI analysis_field = kwargs.get("analysis_field", "_source.body") # TODO set from UI num_returned = 20 clusters = get_dynamic_clusters(data_set_id, "emails", email_addrs=email_address_list, query_terms=query_terms, topic_score=None, entity={}, date_bounds=(start_datetime, end_datetime), cluster_fields=[analysis_field], cluster_title_fields=["_source.subject"], algorithm=algorithm, max_doc_pool_size=500) return {"topics": clusters[:num_returned]}
def getRankedEmails(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("getRankedEmails(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) return get_ranked_email_address_from_email_addrs_index( data_set_id, start_datetime, end_datetime, size)
def getCommunities(*args, **kwargs): tangelo.log("getCommunities(args: %s kwargs: %s)" % (str(args), str(kwargs))) tangelo.content_type("application/json") data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) #top_count = int(urllib.unquote(nth(args, 0, "40"))) top_count = int(size); return {"communities" : get_top_communities(data_set_id, date_bounds=(start_datetime, end_datetime), num_communities=top_count)[:top_count]}
def getAttachCount(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("getAttachCount(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) attach_type = urllib.unquote(nth(args, 0, '')) if not attach_type: return tangelo.HTTPStatusCode( 400, "invalid service call - missing attach_type") attach_type = 'all' #hack for now email_address_list = parseParamEmailAddress(**kwargs) if not email_address_list: activity = get_total_attachment_activity( data_set_id, data_set_id, query_function=attachment_histogram, sender_email_addr="", start=start_datetime, end=end_datetime, interval="week") result = { "account_activity_list": [{ "account_id": data_set_id, "data_set_id": data_set_id, "account_start_datetime": start_datetime, "account_end_datetime": end_datetime, "activities": activity }] } else: result = { "account_activity_list": [{ "account_id": account_id, "data_set_id": data_set_id, "account_start_datetime": start_datetime, "account_end_datetime": end_datetime, "activities": get_emailer_attachment_activity(data_set_id, account_id, (start_datetime, end_datetime), interval="week") } for account_id in email_address_list] } return result
def exif_emails(*args, **kwargs): tangelo.log("geo.exif_emails(args: %s kwargs: %s)" % (str(args), str(kwargs))) tangelo.content_type("application/json") data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) qs = parseParamTextQuery(**kwargs) return es_get_exif_emails(data_set_id, size)
def getAccountActivity(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("getAccountActivity(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) account_type = urllib.unquote(nth(args, 0, '')) if not account_type: return tangelo.HTTPStatusCode( 400, "invalid service call - missing account_type") email_address_list = parseParamEmailAddress(**kwargs) if not email_address_list: result = { "account_activity_list": [{ "account_id": data_set_id, "data_set_id": data_set_id, "account_start_datetime": start_datetime, "account_end_datetime": end_datetime, "activities": get_email_activity(data_set_id, data_set_id, date_bounds=(start_datetime, end_datetime), interval="week") }] } else: result = { "account_activity_list": [{ "account_id": account_id, "data_set_id": data_set_id, "account_start_datetime": start_datetime, "account_end_datetime": end_datetime, "activities": get_email_activity(data_set_id, data_set_id, account_id, date_bounds=(start_datetime, end_datetime), interval="week") } for account_id in email_address_list] } return result
def getAllAttachmentBySender(*args, **kwargs): tangelo.log("getAttachmentsSender(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) sender=nth(args, 0, '') if not data_set_id: return tangelo.HTTPStatusCode(400, "invalid service call - missing data_set_id") if not sender: return tangelo.HTTPStatusCode(400, "invalid service call - missing sender") tangelo.content_type("application/json") return get_attachments_by_sender(data_set_id, sender, start_datetime, end_datetime, size )
def exportStarred(*args, **kwargs): data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) # TODO set from UI query_terms='' email_address_list = [] query = _build_email_query(email_addrs=email_address_list, qs=query_terms, date_bounds=(start_datetime, end_datetime), starred=True) tangelo.log("email.exportStarred(query: %s)" % (query)) results = _query_emails(data_set_id, size, query) email_ids = [hit["num"] for hit in results["hits"]] return export_emails_archive(data_set_id, email_ids)
def setStarred(*args, **kwargs): tangelo.log("setStarred(args: %s kwargs: %s)" % (str(args), str(kwargs))) tangelo.content_type("application/json") data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) email_id = args[-1] if not email_id: return tangelo.HTTPStatusCode(400, "invalid service call - missing email_id") starred = parseParamStarred(**kwargs) return set_starred(data_set_id, [email_id], starred)
def getEmail(*args, **kwargs): tangelo.log("getEmail(args: %s kwargs: %s)" % (str(args), str(kwargs))) tangelo.content_type("application/json") data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) qs = parseParamTextQuery(**kwargs) email_id = args[-1] if not email_id: return tangelo.HTTPStatusCode(400, "invalid service call - missing email_id") return get_email(data_set_id, email_id, qs)
def search(*path_args, **param_args): tangelo.content_type("application/json") tangelo.log("search.search(path_args[%s] %s)" % (len(path_args), str(path_args))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **param_args) # TODO this needs to come from UI size = size if size > 500 else 2500 # TODO make sure that the qs param is put on the query qs = parseParamTextQuery(**param_args) #re-direct based on field if (path_args[0] == "text") or (path_args[0] == "all"): if len(path_args) == 1: return {"graph": {"nodes": [], "links": []}, "rows": []} elif len(path_args) >= 2: # TODO remove hacky path_args - should come from params qs = urllib.unquote(nth(path_args, 1, '')) return get_top_email_by_text_query(data_set_id, qs, start_datetime, end_datetime, size) elif path_args[0] == "email": if len(path_args) == 1: return {"graph": {"nodes": [], "links": []}, "rows": []} elif len(path_args) >= 2: # TODO remove hacky path_args - should come from params email_address = urllib.unquote(nth(path_args, 1, '')) return es_get_all_email_by_address(data_set_id, email_address, qs, start_datetime, end_datetime, size) # TODO REMOVEV this call # elif path_args[0] == "entity": # return get_graph_by_entity(*path_args, **param_args) # TODO clean up this method elif path_args[0] == "topic": if len(path_args) == 1: return {"graph": {"nodes": [], "links": []}, "rows": []} elif len(path_args) >= 2: #TODO implement search by topic return {"graph": {"nodes": [], "links": []}, "rows": []} elif path_args[0] == "community": if len(path_args) == 1: return {"graph": {"nodes": [], "links": []}, "rows": []} elif len(path_args) >= 2: #TODO implement search by community return {"graph": {"nodes": [], "links": []}, "rows": []} return {"graph": {"nodes": [], "links": []}, "rows": []}
def getDomains(*args, **kwargs): tangelo.log("getDomains(args: %s kwargs: %s)" % (str(args), str(kwargs))) tangelo.content_type("application/json") data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) #top_count = int(urllib.unquote(nth(args, 0, "40"))) top_count = int(size) return { "domains": get_top_domains(data_set_id, date_bounds=(start_datetime, end_datetime), num_domains=top_count)[:top_count] }
def getEmail(*args, **kwargs): tangelo.log("getEmail(args: %s kwargs: %s)" % (str(args), str(kwargs))) tangelo.content_type("application/json") data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) qs = parseParamTextQuery(**kwargs) email_id = args[-1] if not email_id: return tangelo.HTTPStatusCode( 400, "invalid service call - missing email_id") return get_email(data_set_id, email_id, qs)
def setStarred(*args, **kwargs): tangelo.log("setStarred(args: %s kwargs: %s)" % (str(args), str(kwargs))) tangelo.content_type("application/json") data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) email_id = args[-1] if not email_id: return tangelo.HTTPStatusCode( 400, "invalid service call - missing email_id") starred = parseParamStarred(**kwargs) return set_starred(data_set_id, [email_id], starred)
def exportStarred(*args, **kwargs): data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) # TODO set from UI query_terms = '' email_address_list = [] query = _build_email_query(email_addrs=email_address_list, qs=query_terms, date_bounds=(start_datetime, end_datetime), starred=True) tangelo.log("email.exportStarred(query: %s)" % (query)) results = _query_emails(data_set_id, size, query) email_ids = [hit["num"] for hit in results["hits"]] return export_emails_archive(data_set_id, email_ids)
def getAllAttachmentBySender(*args, **kwargs): tangelo.log("getAttachmentsSender(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) sender = nth(args, 0, '') if not data_set_id: return tangelo.HTTPStatusCode( 400, "invalid service call - missing data_set_id") if not sender: return tangelo.HTTPStatusCode(400, "invalid service call - missing sender") tangelo.content_type("application/json") return get_attachments_by_sender(data_set_id, sender, start_datetime, end_datetime, size)
def search_email_by_address_set(*path_args, **param_args): tangelo.content_type("application/json") tangelo.log("search.search_email_by_address_set(path_args[%s] %s)" % (len(path_args), str(path_args))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **param_args) # TODO: set from UI size = param_args.get('size', 2500) # parse the sender address and the recipient address sender_address_list, recipient_address_list = parseParamAllSenderAllRecipient( **param_args) return es_get_all_email_by_address_set(data_set_id, sender_address_list, recipient_address_list, start_datetime, end_datetime, size)
def search_email_by_conversation_forward_backward(*path_args, **param_args): tangelo.content_type("application/json") tangelo.log("search.search_email_by_address_set(path_args[%s] %s)" % (len(path_args), str(path_args))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **param_args) # TODO: set from UI size = param_args.get('size', 2500) order = param_args.get('order', 'next') order = 'desc' if order == 'prev' else 'asc' # parse the sender address and the recipient address sender_address_list, recipient_address_list = parseParamAllSenderAllRecipient( **param_args) return es_get_all_email_by_conversation_forward_backward( data_set_id, sender_address_list, recipient_address_list, start_datetime, end_datetime, size, order)
def get_attachment_by_id(*args, **kwargs): data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) attachment_id=nth(args, 0, '') if not attachment_id: attachment_id = parseParamAttachmentGUID(**kwargs) cherrypy.log("email.get_attachments_sender(index=%s, attachment_id=%s)" % (data_set_id, attachment_id)) if not data_set_id: return tangelo.HTTPStatusCode(400, "invalid service call - missing index") if not attachment_id: return tangelo.HTTPStatusCode(400, "invalid service call - missing attachment_id") attachment = es().get(index=data_set_id, doc_type="attachments", id=attachment_id) if not attachment: return tangelo.HTTPStatusCode(400, "no attachments found for (index=%s, attachment_id=%s)" % (data_set_id, attachment_id)) attachment = attachment["_source"] ext = attachment["extension"] filename = attachment["filename"] mime_type = mimetypes.guess_type(filename)[0] if not mime_type: tangelo.content_type("application/x-download") header("Content-Disposition", 'attachment; filename="{}"'.format(filename)) else: tangelo.content_type(mime_type) header("Content-Disposition", 'inline; filename="{}"'.format(filename)) content = attachment["contents64"] bytes = base64.b64decode(content) # dump(bytes, filename) as_str = str(bytes) tangelo.log(str(len(as_str)), "Uploading Attachment - length = ") return as_str
def get_top_entities(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("entity.get_top_entities(args: %s kwargs: %s)" % (str(args), str(kwargs))) top_count = int(urllib.unquote(nth(args, 0, "20"))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) email_address_list = parseParamEmailAddress(**kwargs) # TODO set from UI qs = parseParamTextQuery(**kwargs) if not email_address_list: # TODO qs not being evaluated in inner filter called by this method entities = get_entity_histogram(data_set_id, "emails", qs=qs, date_bounds=(start_datetime, end_datetime))[:top_count] result = { "entities": [[str(i), entity["type"], entity["key"], entity["doc_count"]] for i, entity in enumerate(entities)] } else: # TODO qs not being evaluated in inner filter called by this method entities = get_entity_histogram(data_set_id, "emails", email_address_list, qs=qs, date_bounds=(start_datetime, end_datetime))[:top_count] result = { "entities": [[str(i), entity["type"], entity["key"], entity["doc_count"]] for i, entity in enumerate(entities)] } return result
def getAttachCount(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("getAttachCount(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) attach_type = urllib.unquote(nth(args, 0, '')) if not attach_type: return tangelo.HTTPStatusCode(400, "invalid service call - missing attach_type") attach_type = 'all' #hack for now email_address_list = parseParamEmailAddress(**kwargs); if not email_address_list : activity = get_total_attachment_activity(data_set_id, data_set_id, query_function=attachment_histogram, sender_email_addr="", start=start_datetime, end=end_datetime, interval="week") result = {"account_activity_list" : [ { "account_id" : data_set_id, "data_set_id" : data_set_id, "account_start_datetime" : start_datetime, "account_end_datetime" : end_datetime, "activities" : activity } ] } else: result = {"account_activity_list" : [ { "account_id" : account_id, "data_set_id" : data_set_id, "account_start_datetime" : start_datetime, "account_end_datetime" : end_datetime, "activities" : get_emailer_attachment_activity(data_set_id, account_id, (start_datetime, end_datetime), interval="week") } for account_id in email_address_list ] } return result
def listAllDataSet(): tangelo.log("datasource.listAllDataSet()") # Ignore index keys in ES that are not in the newman_app.conf # Find all the indexes that begin with the index loader prefix indexes = [_index_record(index) for index in index_list() if index in data_set_names() or index.startswith(index_creator_prefix())] data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**{}) email_addrs = get_ranked_email_address_from_email_addrs_index(data_set_id, start_datetime, end_datetime, size)["emails"] email_addrs = {email_addr[0]:email_addr for email_addr in email_addrs} return { "data_set_selected": getDefaultDataSetID(), "data_sets": indexes, "top_hits": { "order_by":"rank", "email_addrs": email_addrs } }
def get_top_entities(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("entity.get_top_entities(args: %s kwargs: %s)" % (str(args), str(kwargs))) top_count=int(urllib.unquote(nth(args, 0, "20"))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) email_address_list = parseParamEmailAddress(**kwargs); # TODO set from UI qs = parseParamTextQuery(**kwargs) if not email_address_list : # TODO qs not being evaluated in inner filter called by this method entities = get_entity_histogram(data_set_id, "emails", qs=qs, date_bounds=(start_datetime, end_datetime))[:top_count] result = {"entities" : [ [ str(i), entity ["type"], entity ["key"], entity ["doc_count"] ] for i,entity in enumerate(entities) ] } else: # TODO qs not being evaluated in inner filter called by this method entities = get_entity_histogram(data_set_id, "emails", email_address_list, qs=qs, date_bounds=(start_datetime, end_datetime))[:top_count] result = {"entities" : [ [ str(i), entity ["type"], entity ["key"], entity ["doc_count"] ] for i,entity in enumerate(entities) ] } return result
def getAccountActivity(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("getAccountActivity(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) account_type = urllib.unquote(nth(args, 0, '')) if not account_type: return tangelo.HTTPStatusCode(400, "invalid service call - missing account_type") email_address_list = parseParamEmailAddress(**kwargs); if not email_address_list : result = {"account_activity_list" : [ { "account_id" : data_set_id, "data_set_id" : data_set_id, "account_start_datetime" : start_datetime, "account_end_datetime" : end_datetime, "activities" : get_email_activity(data_set_id, data_set_id, date_bounds=(start_datetime, end_datetime), interval="week") } ] } else: result = {"account_activity_list" : [ { "account_id" : account_id, "data_set_id" : data_set_id, "account_start_datetime" : start_datetime, "account_end_datetime" : end_datetime, "activities" : get_email_activity(data_set_id, data_set_id, account_id, date_bounds=(start_datetime, end_datetime), interval="week") } for account_id in email_address_list ] } return result
def getAttachFileType(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("getAttachFileType(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) top_count = int(size) attach_type = urllib.unquote(nth(args, 0, '')) if not attach_type: attach_type = 'all' #hack for now email_address_list = parseParamEmailAddress(**kwargs) if not email_address_list: file_types = get_top_attachment_types( data_set_id, date_bounds=(start_datetime, end_datetime), num_top_attachments=top_count)[:top_count] else: #TODO: implement populating the attachment file-types under individual email-accounts; simulate result for now file_types = get_top_attachment_types( data_set_id, date_bounds=(start_datetime, end_datetime), num_top_attachments=top_count)[:top_count] result = { "account_id": data_set_id, "data_set_id": data_set_id, "account_start_datetime": start_datetime, "account_end_datetime": end_datetime, "types": file_types } return result
def getRankedEmails(*args, **kwargs): tangelo.content_type("application/json") tangelo.log("getRankedEmails(args: %s kwargs: %s)" % (str(args), str(kwargs))) data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) return get_ranked_email_address_from_email_addrs_index(data_set_id, start_datetime, end_datetime, size)
def exportMany(*args, **kwargs): data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs) email_ids = parseParamEmailIds(**kwargs) return export_emails_archive(data_set_id, email_ids)
def exportMany(*args, **kwargs): data_set_id, start_datetime, end_datetime, size = parseParamDatetime( **kwargs) email_ids = parseParamEmailIds(**kwargs) return export_emails_archive(data_set_id, email_ids)