Beispiel #1
0
def searchStarred(*args, **kwargs):
    tangelo.log("email.searchStarred(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    tangelo.content_type("application/json")

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)

    size = size if size >500 else 2500

    # TODO set from UI
    query_terms=''
    email_address_list = []

    query = _build_email_query(email_addrs=email_address_list, qs=query_terms, date_bounds=(start_datetime, end_datetime), starred=True)
    tangelo.log("email.searchStarred(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    graph = _build_graph_for_emails(data_set_id, results["hits"], results["total"])

    # Get attachments for community
    query = _build_email_query(email_addrs=email_address_list, qs=query_terms, date_bounds=(start_datetime, end_datetime), attachments_only=True, starred=True)
    tangelo.log("email.searchStarred(attachment-query: %s)" % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments

    return graph
Beispiel #2
0
def es_get_all_email_by_community(data_set_id, community, email_addrs, qs,
                                  start_datetime, end_datetime, size):
    tangelo.log(
        "es_search.es_get_all_email_by_community(community=%s, email_addrs=%s)"
        % (str(community), str(email_addrs)))

    query = _build_email_query(email_addrs=email_addrs,
                               qs='',
                               date_bounds=(start_datetime, end_datetime),
                               communities=[community])
    tangelo.log("es_search.es_get_all_email_by_community(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)

    graph = _build_graph_for_emails(data_set_id, results["hits"],
                                    results["total"])

    # Get attachments for community
    query = _build_email_query(email_addrs=email_addrs,
                               qs='',
                               date_bounds=(start_datetime, end_datetime),
                               communities=[community],
                               attachments_only=True)
    tangelo.log(
        "es_search.es_get_all_email_by_community(attachment-query: %s)" %
        (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments

    return graph
Beispiel #3
0
def get_graph_for_entity(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("entity.get_graph_for_entity(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)
    email_address_list = parseParamEmailAddress(**kwargs)
    entity_dict = parseParamEntity(**kwargs)
    # TODO set from UI
    size = size if size > 500 else 2500

    qs = parseParamTextQuery(**kwargs)

    query = _build_email_query(email_addrs=email_address_list,
                               qs=qs,
                               entity=entity_dict,
                               date_bounds=(start_datetime, end_datetime))
    tangelo.log("entity.get_graph_for_entity(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    graph = _build_graph_for_emails(data_set_id, results["hits"],
                                    results["total"])

    # Get attachments for community
    query = _build_email_query(email_addrs=email_address_list,
                               qs=qs,
                               entity=entity_dict,
                               date_bounds=(start_datetime, end_datetime),
                               attachments_only=True)
    tangelo.log("entity.get_graph_by_entity(attachment-query: %s)" % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments

    return graph
Beispiel #4
0
def searchStarred(*args, **kwargs):
    tangelo.log("email.searchStarred(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    tangelo.content_type("application/json")

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    size = size if size > 500 else 2500

    # TODO set from UI
    query_terms = ''
    email_address_list = []

    query = _build_email_query(email_addrs=email_address_list,
                               qs=query_terms,
                               date_bounds=(start_datetime, end_datetime),
                               starred=True)
    tangelo.log("email.searchStarred(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    graph = _build_graph_for_emails(data_set_id, results["hits"],
                                    results["total"])

    # Get attachments for community
    query = _build_email_query(email_addrs=email_address_list,
                               qs=query_terms,
                               date_bounds=(start_datetime, end_datetime),
                               attachments_only=True,
                               starred=True)
    tangelo.log("email.searchStarred(attachment-query: %s)" % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments

    return graph
Beispiel #5
0
def es_get_email_by_phone_numbers(data_set_id,
                                  qs='',
                                  date_bounds=('1970-01-01', 'now'),
                                  phone_numbers=[],
                                  size=20):
    tangelo.log("es_phone_numbers.es_get_all_email_by_phone_number(%s)" %
                (str(phone_numbers)))

    query = _build_email_query(qs=qs,
                               phone_numbers=phone_numbers,
                               date_bounds=date_bounds)
    tangelo.log(
        "es_phone_numbers.es_get_all_email_by_phone_number(query: %s)" %
        (query))

    results = _query_emails(data_set_id, size, query)
    graph = _build_graph_for_emails(data_set_id, results["hits"],
                                    results["total"])

    # Get attachments for community
    query = _build_email_query(qs=qs,
                               phone_numbers=phone_numbers,
                               date_bounds=date_bounds,
                               attachments_only=True)
    tangelo.log(
        "es_phone_numbers.es_get_all_email_by_phone_number(attachment-query: %s)"
        % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments
    return graph
Beispiel #6
0
def get_graph_for_entity(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("entity.get_graph_for_entity(args: %s kwargs: %s)" % (str(args), str(kwargs)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)
    email_address_list = parseParamEmailAddress(**kwargs);
    entity_dict = parseParamEntity(**kwargs)
    # TODO set from UI
    size = size if size >500 else 2500

    qs = parseParamTextQuery(**kwargs)

    query = _build_email_query(email_addrs=email_address_list, qs=qs, entity=entity_dict, date_bounds=(start_datetime, end_datetime))
    tangelo.log("entity.get_graph_for_entity(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    graph = _build_graph_for_emails(data_set_id, results["hits"], results["total"])

    # Get attachments for community
    query = _build_email_query(email_addrs=email_address_list, qs=qs, entity=entity_dict, date_bounds=(start_datetime, end_datetime), attachments_only=True)
    tangelo.log("entity.get_graph_by_entity(attachment-query: %s)" % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments

    return graph
Beispiel #7
0
def get_top_email_by_text_query(data_set_id, qs, start_datetime, end_datetime,
                                size):

    if not qs:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing search term(s)")

    query = _build_email_query(qs=qs,
                               date_bounds=(start_datetime, end_datetime))
    tangelo.log("es_search.get_graph_for_text_query(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    graph = _build_graph_for_emails(data_set_id, results["hits"],
                                    results["total"])

    # Get attachments for community
    query = _build_email_query(qs=qs,
                               date_bounds=(start_datetime, end_datetime),
                               attachments_only=True)
    tangelo.log("es_search.get_top_email_by_text_query(attachment-query: %s)" %
                (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments

    return graph
Beispiel #8
0
def es_get_all_email_by_address(data_set_id, email_address, qs, start_datetime, end_datetime, size):
    tangelo.log("es_search.get_graph_for_email_address(%s)" % (str(email_address)))

    query  = _build_email_query(email_addrs=[email_address], qs=qs, date_bounds=(start_datetime, end_datetime))
    tangelo.log("es_search.get_graph_for_email_address(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    graph = _build_graph_for_emails(data_set_id, results["hits"], results["total"])

    # Get attachments for community
    query = _build_email_query(email_addrs=[email_address], qs=qs, date_bounds=(start_datetime, end_datetime), attachments_only=True)
    tangelo.log("search.get_graph_by_entity(attachment-query: %s)" % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments
    return graph
Beispiel #9
0
def es_get_email_by_phone_numbers(data_set_id, qs='', date_bounds=('1970-01-01', 'now'), phone_numbers=[], size=20):
    tangelo.log("es_phone_numbers.es_get_all_email_by_phone_number(%s)" % (str(phone_numbers)))

    query  = _build_email_query(qs=qs, phone_numbers=phone_numbers, date_bounds=date_bounds)
    tangelo.log("es_phone_numbers.es_get_all_email_by_phone_number(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    graph = _build_graph_for_emails(data_set_id, results["hits"], results["total"])

    # Get attachments for community
    query = _build_email_query(qs=qs, phone_numbers=phone_numbers, date_bounds=date_bounds, attachments_only=True)
    tangelo.log("es_phone_numbers.es_get_all_email_by_phone_number(attachment-query: %s)" % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments
    return graph
Beispiel #10
0
def get_attachments_by_sender(data_set_id, sender, start_datetime, end_datetime, size):

    # fields= ["id", "dir", "datetime", "from", "tos", "ccs", "bccs", "subject", "attach", "bodysize"]
    # fields= ["id", "datetime", "senders", "tos", "ccs", "bccs", "subject", "attachments.filename"]
    # body={"filter":{"exists":{"field":"attachments"}}, "query":{"match":{"senders":sender}}}

    body = _build_email_query(sender_addrs=[sender], date_bounds=(start_datetime, end_datetime), attachments_only=True)
    tangelo.log("get_attachments_by_sender.Query %s"%body)

    attachments_resp = es().search(index=data_set_id, doc_type="emails", size=size, body=body)

    email_attachments = []
    for attachment_item in attachments_resp["hits"]["hits"]:
        _source = attachment_item["_source"]
        attachment_entry = [_source["id"],
                            "PLACEHOLDER",
                            _source["datetime"],
                            _source.get("senders","")[0],
                            ';'.join(_source.get("tos","")),
                            ';'.join(_source.get("ccs","")),
                            ';'.join(_source.get("bccs","")),
                            _source.get("subject","")]
        for attachment in _source["attachments"]:
            l = list(attachment_entry)
            l[1] = attachment["guid"]
            l.append(attachment["filename"])
            l.append(0)
            email_attachments.append(l)
    return {"sender":sender, "email_attachments":email_attachments}
Beispiel #11
0
def _cluster_carrot2(index,
                     type,
                     email_addrs=[],
                     query_terms='',
                     topic_score=None,
                     entity={},
                     date_bounds=None,
                     cluster_fields=["_source.body"],
                     cluster_title_fields=["_source.subject"],
                     algorithm="lingo",
                     max_doc_pool_size=500):
    query = _build_email_query(email_addrs=email_addrs,
                               qs=query_terms,
                               entity=entity,
                               date_bounds=date_bounds)
    carrot_query = {
        "search_request": {
            "query": query["query"],
            "size": max_doc_pool_size
        },
        "algorithm": algorithm,
        "max_hits": 0,
        "query_hint": query_terms,
        "field_mapping": {
            "title": cluster_title_fields,
            "content": cluster_fields
        }
    }

    resp = es().transport.perform_request(
        "POST",
        "/{}/{}/_search_with_clusters".format(index, type), {},
        body=carrot_query)
    total_docs = min(resp[1]["hits"]["total"], max_doc_pool_size)
    return resp
Beispiel #12
0
def es_get_all_email_by_topic(data_set_id, topic, email_addrs, qs, start_datetime, end_datetime, size):
    tangelo.log("es_search.es_get_all_email_by_topic(email_addrs=%s, topic=%s)" % ( str(email_addrs), str(topic)))

    query  = _build_email_query(email_addrs=email_addrs, qs='', topic=topic, sort_mode="topic", sort_order="desc", date_bounds=(start_datetime, end_datetime))
    tangelo.log("es_search.es_get_all_email_by_topic(query: %s)" % (query))

    # Get emails graph for topics
    emails = _query_emails(data_set_id, size, query, additional_fields=["topic_scores.idx_"+str(topic["idx"])])
    graph = _build_graph_for_emails(data_set_id, emails["hits"], emails["total"])

    # Get attachments for top score topic
    query  = _build_email_query(email_addrs=email_addrs, qs='', topic=topic, sort_mode="topic", sort_order="desc", date_bounds=(start_datetime, end_datetime), attachments_only=True)
    tangelo.log("es_search.es_get_all_email_by_topic(attachment-query: %s)" % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments
    return graph
Beispiel #13
0
def es_get_all_email_by_community(data_set_id, community, email_addrs, qs, start_datetime, end_datetime, size):
    tangelo.log("es_search.es_get_all_email_by_community(community=%s, email_addrs=%s)" % (str(community), str(email_addrs)))

    query = _build_email_query(email_addrs=email_addrs, qs='', date_bounds=(start_datetime, end_datetime), communities=[community])
    tangelo.log("es_search.es_get_all_email_by_community(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)

    graph = _build_graph_for_emails(data_set_id, results["hits"], results["total"])

    # Get attachments for community
    query = _build_email_query(email_addrs=email_addrs, qs='', date_bounds=(start_datetime, end_datetime), communities=[community], attachments_only=True)
    tangelo.log("es_search.es_get_all_email_by_community(attachment-query: %s)" % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments

    return graph
Beispiel #14
0
def es_get_all_email_by_conversation_forward_backward(data_set_id,
                                                      sender,
                                                      recipients,
                                                      start_datetime,
                                                      end_datetime,
                                                      size,
                                                      sort_order="asc"):
    tangelo.log(
        "es_search.es_get_all_email_by_conversation_forward_backward(sender=%s, recipients=%s)"
        % (str(sender), str(recipients)))

    # apply query with address intersection behaviour
    query = _build_email_query(sender_addrs=[sender],
                               recipient_addrs=recipients,
                               qs='',
                               date_bounds=(start_datetime, end_datetime),
                               sort_order=sort_order,
                               date_mode_inclusive=False,
                               address_filter_mode="conversation")
    tangelo.log(
        "es_search.es_get_all_email_by_conversation_forward_backward(query: %s)"
        % (query))

    results = _query_emails(data_set_id, size, query)
    # If you do not want to generate a graph each time this is called use this code
    # return {"graph":{"nodes":[], "links":[]}, "rows": [_map_emails_to_row(email) for email in results["hits"]], "query_hits" : results["total"]}

    graph = _build_graph_for_emails(data_set_id, results["hits"],
                                    results["total"])

    # Get attachments for community
    query = _build_email_query(sender_addrs=[sender],
                               recipient_addrs=recipients,
                               qs='',
                               date_bounds=(start_datetime, end_datetime),
                               sort_order=sort_order,
                               date_mode_inclusive=False,
                               address_filter_mode="conversation",
                               attachments_only=True)
    tangelo.log(
        "es_search.es_get_all_email_by_conversation_forward_backward(attachment-query: %s)"
        % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments
    return graph
Beispiel #15
0
def get_top_email_by_text_query(data_set_id, qs, start_datetime, end_datetime, size):

    if not qs:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing search term(s)")

    query  = _build_email_query(qs=qs, date_bounds=(start_datetime, end_datetime))
    tangelo.log("es_search.get_graph_for_text_query(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    graph = _build_graph_for_emails(data_set_id, results["hits"], results["total"])

    # Get attachments for community
    query = _build_email_query(qs=qs, date_bounds=(start_datetime, end_datetime), attachments_only=True)
    tangelo.log("es_search.get_top_email_by_text_query(attachment-query: %s)" % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments

    return graph
Beispiel #16
0
def es_get_all_email_by_conversation_forward_backward(data_set_id, sender, recipients, start_datetime, end_datetime, size, sort_order="asc"):
    tangelo.log("es_search.es_get_all_email_by_conversation_forward_backward(sender=%s, recipients=%s)" % (str(sender),str(recipients)))

    # apply query with address intersection behaviour
    query  = _build_email_query(sender_addrs=[sender], recipient_addrs=recipients, qs='', date_bounds=(start_datetime, end_datetime), sort_order=sort_order, date_mode_inclusive=False, address_filter_mode="conversation")
    tangelo.log("es_search.es_get_all_email_by_conversation_forward_backward(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    # If you do not want to generate a graph each time this is called use this code
    # return {"graph":{"nodes":[], "links":[]}, "rows": [_map_emails_to_row(email) for email in results["hits"]], "query_hits" : results["total"]}

    graph = _build_graph_for_emails(data_set_id, results["hits"], results["total"])

    # Get attachments for community
    query = _build_email_query(sender_addrs=[sender], recipient_addrs=recipients, qs='', date_bounds=(start_datetime, end_datetime), sort_order=sort_order, date_mode_inclusive=False, address_filter_mode="conversation", attachments_only=True)
    tangelo.log("es_search.es_get_all_email_by_conversation_forward_backward(attachment-query: %s)" % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments
    return graph
Beispiel #17
0
def es_get_conversation(data_set_id, sender, recipients, start_datetime, end_datetime, size, document_uid, current_datetime):
    tangelo.log("es_search.es_get_conversation(senders=%s, recipients=%s)" % (str(sender),str(recipients)))
    #start_datetime = default_min_timeline_bound()
    
    # apply query with address intersection behavior
    query  = _build_email_query(sender_addrs=[sender], recipient_addrs=recipients, qs='', date_bounds=(current_datetime, end_datetime), sort_order='acs', date_mode_inclusive=True, address_filter_mode="conversation")
    tangelo.log("es_search.es_get_conversation(query-after: %s)" % (query))
    emails_asc = _query_emails(data_set_id, size, query)

    query  = _build_email_query(sender_addrs=[sender], recipient_addrs=recipients, qs='', date_bounds=(start_datetime, current_datetime), sort_order='desc', date_mode_inclusive=False, address_filter_mode="conversation")
    tangelo.log("es_search.es_get_conversation(query-before: %s)" % (query))
    emails_desc = _query_emails(data_set_id, size, query)
    total = emails_asc["total"] + emails_desc["total"]

    emails_desc = emails_desc['hits']
    emails_desc.reverse()
    current_index= len(emails_desc)
    emails = emails_desc + emails_asc['hits']

    # return {"graph":{"nodes":[], "links":[]}, "rows": [ascw(email)results["totaldesc+ results["total"] for email in results["hits"]], "query_hits" : results["total"]}
    graph = _build_graph_for_emails(data_set_id, emails, total)
    graph['current_index'] = current_index

    # Get attachments for community
    query = _build_email_query(sender_addrs=[sender], recipient_addrs=recipients, qs='', date_bounds=(current_datetime, end_datetime), sort_order='asc', date_mode_inclusive=True, address_filter_mode="conversation", attachments_only=True)
    tangelo.log("es_search.es_get_conversation(attachment-query-after: %s)" % (query))
    attachments_asc = _query_email_attachments(data_set_id, size, query)

    query = _build_email_query(sender_addrs=[sender], recipient_addrs=recipients, qs='', date_bounds=(start_datetime, current_datetime), sort_order='desc', date_mode_inclusive=False, address_filter_mode="conversation", attachments_only=True)
    tangelo.log("es_search.es_get_conversation(attachment-query-after: %s)" % (query))
    attachments_desc = _query_email_attachments(data_set_id, size, query)
    attachments_desc.reverse()

    # Find the first index in the attachment array where the current emails attachments start or -1
    graph["attachments"] = attachments_desc+attachments_asc
    try:
        graph["attachments_index"] = [attach[0] for attach in graph["attachments"]].index(document_uid)
    except ValueError:
        graph["attachments_index"] = -1

    return graph
Beispiel #18
0
def exportStarred(*args, **kwargs):
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)
    # TODO set from UI
    query_terms=''
    email_address_list = []

    query = _build_email_query(email_addrs=email_address_list, qs=query_terms, date_bounds=(start_datetime, end_datetime), starred=True)
    tangelo.log("email.exportStarred(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    email_ids = [hit["num"] for hit in results["hits"]]
    return export_emails_archive(data_set_id, email_ids)
Beispiel #19
0
def es_get_all_email_by_address(data_set_id, email_address, qs, start_datetime,
                                end_datetime, size):
    tangelo.log("es_search.get_graph_for_email_address(%s)" %
                (str(email_address)))

    query = _build_email_query(email_addrs=[email_address],
                               qs=qs,
                               date_bounds=(start_datetime, end_datetime))
    tangelo.log("es_search.get_graph_for_email_address(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    graph = _build_graph_for_emails(data_set_id, results["hits"],
                                    results["total"])

    # Get attachments for community
    query = _build_email_query(email_addrs=[email_address],
                               qs=qs,
                               date_bounds=(start_datetime, end_datetime),
                               attachments_only=True)
    tangelo.log("search.get_graph_by_entity(attachment-query: %s)" % (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments
    return graph
Beispiel #20
0
def es_get_all_email_by_topic(data_set_id, topic, email_addrs, qs,
                              start_datetime, end_datetime, size):
    tangelo.log(
        "es_search.es_get_all_email_by_topic(email_addrs=%s, topic=%s)" %
        (str(email_addrs), str(topic)))

    query = _build_email_query(email_addrs=email_addrs,
                               qs='',
                               topic=topic,
                               sort_mode="topic",
                               sort_order="desc",
                               date_bounds=(start_datetime, end_datetime))
    tangelo.log("es_search.es_get_all_email_by_topic(query: %s)" % (query))

    # Get emails graph for topics
    emails = _query_emails(
        data_set_id,
        size,
        query,
        additional_fields=["topic_scores.idx_" + str(topic["idx"])])
    graph = _build_graph_for_emails(data_set_id, emails["hits"],
                                    emails["total"])

    # Get attachments for top score topic
    query = _build_email_query(email_addrs=email_addrs,
                               qs='',
                               topic=topic,
                               sort_mode="topic",
                               sort_order="desc",
                               date_bounds=(start_datetime, end_datetime),
                               attachments_only=True)
    tangelo.log("es_search.es_get_all_email_by_topic(attachment-query: %s)" %
                (query))
    attachments = _query_email_attachments(data_set_id, size, query)
    graph["attachments"] = attachments
    return graph
Beispiel #21
0
def exportStarred(*args, **kwargs):
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)
    # TODO set from UI
    query_terms = ''
    email_address_list = []

    query = _build_email_query(email_addrs=email_address_list,
                               qs=query_terms,
                               date_bounds=(start_datetime, end_datetime),
                               starred=True)
    tangelo.log("email.exportStarred(query: %s)" % (query))

    results = _query_emails(data_set_id, size, query)
    email_ids = [hit["num"] for hit in results["hits"]]
    return export_emails_archive(data_set_id, email_ids)
Beispiel #22
0
def _cluster_carrot2(index, type, email_addrs=[], query_terms='', topic_score=None, entity={}, date_bounds=None, cluster_fields=["_source.body"], cluster_title_fields=["_source.subject"], algorithm="lingo", max_doc_pool_size=500):
    query = _build_email_query(email_addrs=email_addrs, qs=query_terms,  entity=entity, date_bounds=date_bounds)
    carrot_query = {
        "search_request": {
            "query": query["query"],
            "size": max_doc_pool_size
        },
        "algorithm":algorithm,
        "max_hits": 0,
        "query_hint": query_terms,
        "field_mapping": {
            "title": cluster_title_fields,
            "content": cluster_fields
        }
    }

    resp = es().transport.perform_request("POST", "/{}/{}/_search_with_clusters".format(index,type), {}, body=carrot_query)
    total_docs = min(resp[1]["hits"]["total"], max_doc_pool_size)
    return resp
Beispiel #23
0
def es_get_conversation(data_set_id, sender, recipients, start_datetime,
                        end_datetime, size, document_uid, current_datetime):
    tangelo.log("es_search.es_get_conversation(senders=%s, recipients=%s)" %
                (str(sender), str(recipients)))
    #start_datetime = default_min_timeline_bound()

    # apply query with address intersection behavior
    query = _build_email_query(sender_addrs=[sender],
                               recipient_addrs=recipients,
                               qs='',
                               date_bounds=(current_datetime, end_datetime),
                               sort_order='acs',
                               date_mode_inclusive=True,
                               address_filter_mode="conversation")
    tangelo.log("es_search.es_get_conversation(query-after: %s)" % (query))
    emails_asc = _query_emails(data_set_id, size, query)

    query = _build_email_query(sender_addrs=[sender],
                               recipient_addrs=recipients,
                               qs='',
                               date_bounds=(start_datetime, current_datetime),
                               sort_order='desc',
                               date_mode_inclusive=False,
                               address_filter_mode="conversation")
    tangelo.log("es_search.es_get_conversation(query-before: %s)" % (query))
    emails_desc = _query_emails(data_set_id, size, query)
    total = emails_asc["total"] + emails_desc["total"]

    emails_desc = emails_desc['hits']
    emails_desc.reverse()
    current_index = len(emails_desc)
    emails = emails_desc + emails_asc['hits']

    # return {"graph":{"nodes":[], "links":[]}, "rows": [ascw(email)results["totaldesc+ results["total"] for email in results["hits"]], "query_hits" : results["total"]}
    graph = _build_graph_for_emails(data_set_id, emails, total)
    graph['current_index'] = current_index

    # Get attachments for community
    query = _build_email_query(sender_addrs=[sender],
                               recipient_addrs=recipients,
                               qs='',
                               date_bounds=(current_datetime, end_datetime),
                               sort_order='asc',
                               date_mode_inclusive=True,
                               address_filter_mode="conversation",
                               attachments_only=True)
    tangelo.log("es_search.es_get_conversation(attachment-query-after: %s)" %
                (query))
    attachments_asc = _query_email_attachments(data_set_id, size, query)

    query = _build_email_query(sender_addrs=[sender],
                               recipient_addrs=recipients,
                               qs='',
                               date_bounds=(start_datetime, current_datetime),
                               sort_order='desc',
                               date_mode_inclusive=False,
                               address_filter_mode="conversation",
                               attachments_only=True)
    tangelo.log("es_search.es_get_conversation(attachment-query-after: %s)" %
                (query))
    attachments_desc = _query_email_attachments(data_set_id, size, query)
    attachments_desc.reverse()

    # Find the first index in the attachment array where the current emails attachments start or -1
    graph["attachments"] = attachments_desc + attachments_asc
    try:
        graph["attachments_index"] = [
            attach[0] for attach in graph["attachments"]
        ].index(document_uid)
    except ValueError:
        graph["attachments_index"] = -1

    return graph