Esempio n. 1
0
 def getTopics(_id):
     with newman_connector() as cnx:
         with execute_query(cnx.conn(), topic_stmt, _id) as qry:
             return [{
                 'name': formatName(nth(o, 0)),
                 'score': formatScore(nth(o, 1))
             } for o in qry.cursor()]
Esempio n. 2
0
def email_scores(*args):
    email_id = unquote(nth(args, 0, ''))
    category = nth(args, 1, 'all')
    if not email_id:
        return tangelo.HTTPStatusCode(400,
                                      "invalid service call - missing email")

    return {"scores": [], "email": email_id, "category": category}
Esempio n. 3
0
def processLine(line):
    items = line.split('\t')
    body = nth(items, 15, '')
    body = re.sub(r'[^\x00-\x7F]', ' ', body)
    body = body.replace('[:newline:]',
                        ' ').replace('[',
                                     '').replace(']',
                                                 '').replace('mailto:', '')
    return (nth(items, 0), nth(items, 1), nth(items, 10), nth(items, 14), body)
Esempio n. 4
0
def search(*path_args, **param_args):
    tangelo.content_type("application/json")
    tangelo.log("search.search(path_args[%s] %s)" %
                (len(path_args), str(path_args)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **param_args)

    # TODO this needs to come from UI
    size = size if size > 500 else 2500

    # TODO make sure that the qs param is put on the query
    qs = parseParamTextQuery(**param_args)

    #re-direct based on field
    if (path_args[0] == "text") or (path_args[0] == "all"):
        if len(path_args) == 1:
            return {"graph": {"nodes": [], "links": []}, "rows": []}
        elif len(path_args) >= 2:
            # TODO remove hacky path_args - should come from params
            qs = urllib.unquote(nth(path_args, 1, ''))
            return get_top_email_by_text_query(data_set_id, qs, start_datetime,
                                               end_datetime, size)
    elif path_args[0] == "email":
        if len(path_args) == 1:
            return {"graph": {"nodes": [], "links": []}, "rows": []}
        elif len(path_args) >= 2:
            # TODO remove hacky path_args - should come from params
            email_address = urllib.unquote(nth(path_args, 1, ''))
            return es_get_all_email_by_address(data_set_id, email_address, qs,
                                               start_datetime, end_datetime,
                                               size)
    # TODO REMOVEV this call
    # elif path_args[0] == "entity":
    #     return get_graph_by_entity(*path_args, **param_args)
    # TODO clean up this method
    elif path_args[0] == "topic":
        if len(path_args) == 1:
            return {"graph": {"nodes": [], "links": []}, "rows": []}
        elif len(path_args) >= 2:
            #TODO implement search by topic
            return {"graph": {"nodes": [], "links": []}, "rows": []}
    elif path_args[0] == "community":
        if len(path_args) == 1:
            return {"graph": {"nodes": [], "links": []}, "rows": []}
        elif len(path_args) >= 2:
            #TODO implement search by community
            return {"graph": {"nodes": [], "links": []}, "rows": []}
    return {"graph": {"nodes": [], "links": []}, "rows": []}
Esempio n. 5
0
def topic_list(*args, **kwargs):
    category = nth(args, 0, 'all')
    #tangelo.log("category %s" %(category))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)
    tangelo.content_type("application/json")
    return get_categories(data_set_id)
Esempio n. 6
0
def getEmail(*args):
    email=urllib.unquote(nth(args, 0, ''))
    if not email:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")
    
    tangelo.content_type("application/json")    
    return { "email" : queryEmail(email), "entities": queryEntity(email) }
Esempio n. 7
0
def email_scores(*args):
    email_id = unquote(nth(args, 0, ''))
    category = nth(args, 1, 'all')
    if not email_id:
        return tangelo.HTTPStatusCode(400,
                                      "invalid service call - missing email")

    stmt = (" select score from xref_email_topic_score "
            " where category_id = %s and email_id = %s "
            " order by idx ")

    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt, category, email_id) as qry:
            rtn = [head(r) for r in qry.cursor()]
            tangelo.content_type("application/json")
            return {"scores": rtn, "email": email_id, "category": category}
Esempio n. 8
0
def search(*args):
    cherrypy.log("args: %s" % str(args))
    cherrypy.log("args-len: %s" % len(args))
    fields=nth(args, 0, 'all')
    args_array=rest(args)
    cherrypy.log("search fields: %s, args: %s" % (fields, args_array))
    return createResults(fields, args_array)
Esempio n. 9
0
def getAttachFileType(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getAttachFileType(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)
    
    top_count = int(size)
        
    attach_type = urllib.unquote(nth(args, 0, ''))
    if not attach_type:
        attach_type = 'all' #hack for now


    email_address_list = parseParamEmailAddress(**kwargs);


    if not email_address_list :
        file_types = get_top_attachment_types(data_set_id, date_bounds=(start_datetime, end_datetime), num_top_attachments=top_count)[:top_count]
    else :
        #TODO: implement populating the attachment file-types under individual email-accounts; simulate result for now
        file_types = get_top_attachment_types(data_set_id, date_bounds=(start_datetime, end_datetime), num_top_attachments=top_count)[:top_count]

    result = {
              "account_id" : data_set_id,
              "data_set_id" : data_set_id,
              "account_start_datetime" : start_datetime,
              "account_end_datetime" : end_datetime,
              "types" : file_types
             }
        
    return result
Esempio n. 10
0
def getEmail(*args):
    email = urllib.unquote(nth(args, 0, ''))
    if not email:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    tangelo.content_type("application/json")
    return {"email": queryEmail(email), "entities": queryEntity(email)}
Esempio n. 11
0
def search_email_by_community(*args, **param_args):
    tangelo.content_type("application/json")
    tangelo.log("search_email_by_community(args: %s kwargs: %s)" %
                (str(args), str(param_args)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **param_args)
    community = nth(args, 0, '')

    # TODO: set from UI
    size = param_args.get('size', 2500)

    if not data_set_id:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing data_set_id")
    if not community:
        return tangelo.HTTPStatusCode(400,
                                      "invalid service call - missing sender")

    email_addrs = parseParam_email_addr(**param_args)

    qs = parseParamTextQuery(**param_args)

    return es_get_all_email_by_community(data_set_id, community, email_addrs,
                                         qs, start_datetime, end_datetime,
                                         size)
Esempio n. 12
0
def email_scores(*args):
    email_id=unquote(nth(args, 0, ''))
    category=nth(args, 1, 'all')
    if not email_id:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing email")

    stmt = (
        " select score from xref_email_topic_score "
        " where category_id = %s and email_id = %s "
        " order by idx "
    )

    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt, category, email_id) as qry:
            rtn = [head(r) for r in qry.cursor()]
            tangelo.content_type("application/json")
            return { "scores" : rtn, "email" : email_id, "category" : category }
Esempio n. 13
0
def setSelectedDataSet(*args):
    tangelo.content_type("application/json")
    data_set_id=urllib.unquote(nth(args, 0, ''))
    if not data_set_id:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing data_set_id")

    resp = initialize_email_addr_cache(data_set_id)
    _current_data_set_selected = data_set_id
    return _index_record(data_set_id)
Esempio n. 14
0
def getRollup(*args):
    entity = urllib.unquote(nth(args, 0, ""))
    if not entity:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt_entity_rollup_id, entity) as qry:
            rtn = qry.cursor().fetchone()
            tangelo.content_type("application/json")
            return {"rollupId": rtn}
Esempio n. 15
0
def topic_list(*args):
    category = nth(args, 0, 'all')
    with newman_connector() as read_cnx:
        stmt = (" select idx, value, docs from topic_category "
                " where category_id = %s "
                " order by idx ")
        with execute_query(read_cnx.conn(), stmt, category) as qry:
            rtn = [r for r in qry.cursor()]
            tangelo.content_type("application/json")
            return {"categories": rtn}
Esempio n. 16
0
def setSelectedDataSet(*args):
    tangelo.content_type("application/json")
    data_set_id = urllib.unquote(nth(args, 0, ''))
    if not data_set_id:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing data_set_id")

    resp = initialize_email_addr_cache(data_set_id)
    _current_data_set_selected = data_set_id
    return _index_record(data_set_id)
Esempio n. 17
0
def getAttachCount(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getAttachCount(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    attach_type = urllib.unquote(nth(args, 0, ''))
    if not attach_type:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing attach_type")

    attach_type = 'all'  #hack for now
    email_address_list = parseParamEmailAddress(**kwargs)

    if not email_address_list:
        activity = get_total_attachment_activity(
            data_set_id,
            data_set_id,
            query_function=attachment_histogram,
            sender_email_addr="",
            start=start_datetime,
            end=end_datetime,
            interval="week")
        result = {
            "account_activity_list": [{
                "account_id": data_set_id,
                "data_set_id": data_set_id,
                "account_start_datetime": start_datetime,
                "account_end_datetime": end_datetime,
                "activities": activity
            }]
        }

    else:
        result = {
            "account_activity_list": [{
                "account_id":
                account_id,
                "data_set_id":
                data_set_id,
                "account_start_datetime":
                start_datetime,
                "account_end_datetime":
                end_datetime,
                "activities":
                get_emailer_attachment_activity(data_set_id,
                                                account_id,
                                                (start_datetime, end_datetime),
                                                interval="week")
            } for account_id in email_address_list]
        }

    return result
Esempio n. 18
0
def getRollup(*args):
    entity = urllib.unquote(nth(args, 0, ''))
    if not entity:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt_entity_rollup_id,
                           entity) as qry:
            rtn = qry.cursor().fetchone()
            tangelo.content_type("application/json")
            return {"rollupId": rtn}
Esempio n. 19
0
def topic_list(*args):
    category=nth(args, 0, 'all')
    with newman_connector() as read_cnx:
        stmt = (
            " select idx, value, docs from topic_category "
            " where category_id = %s "
            " order by idx "
        ) 
        with execute_query(read_cnx.conn(), stmt, category) as qry:
            rtn = [r for r in qry.cursor()]
            tangelo.content_type("application/json")
            return { "categories" : rtn }
Esempio n. 20
0
def seedSearch(*args):
    email_id= urllib.unquote(nth(args, 0))
    if not email_id:
        return tangelo.HTTPStatusCode(400, "missing argument email_id")        
    line_num= findLineNum(email_id) 
    seed_url= "{0}/firstemail/{1}".format(daemon_url,line_num)
    request(seed_url)
    next_url= "{0}/getNextEmail".format(daemon_url)
    start_point=  request(next_url)
    if not start_point:
        return tangelo.HTTPStatusCode(400, "failed to set starting email")                
    return findEmailId(start_point)
Esempio n. 21
0
def getAccountActivity(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getAccountActivity(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    account_type = urllib.unquote(nth(args, 0, ''))
    if not account_type:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing account_type")

    email_address_list = parseParamEmailAddress(**kwargs)

    if not email_address_list:
        result = {
            "account_activity_list": [{
                "account_id":
                data_set_id,
                "data_set_id":
                data_set_id,
                "account_start_datetime":
                start_datetime,
                "account_end_datetime":
                end_datetime,
                "activities":
                get_email_activity(data_set_id,
                                   data_set_id,
                                   date_bounds=(start_datetime, end_datetime),
                                   interval="week")
            }]
        }
    else:
        result = {
            "account_activity_list": [{
                "account_id":
                account_id,
                "data_set_id":
                data_set_id,
                "account_start_datetime":
                start_datetime,
                "account_end_datetime":
                end_datetime,
                "activities":
                get_email_activity(data_set_id,
                                   data_set_id,
                                   account_id,
                                   date_bounds=(start_datetime, end_datetime),
                                   interval="week")
            } for account_id in email_address_list]
        }

    return result
Esempio n. 22
0
def getTopRollup(*args):
    amt = urllib.unquote(nth(args, 0, ""))
    if not amt:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    stmt = stmt_top_rollup_entities + ("limit {0}".format(amt))
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            rtn = [r for r in qry.cursor()]
            rtn = rtn if rtn else []
            tangelo.content_type("application/json")
            return {"entities": rtn}
Esempio n. 23
0
def getAllAttachmentBySender(*args, **kwargs):
    tangelo.log("getAttachmentsSender(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)
    sender=nth(args, 0, '')
    if not data_set_id:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing data_set_id")
    if not sender:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing sender")

    tangelo.content_type("application/json")

    return get_attachments_by_sender(data_set_id, sender, start_datetime, end_datetime, size )
Esempio n. 24
0
def getTopRollup(*args):
    amt = urllib.unquote(nth(args, 0, ''))
    if not amt:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    stmt = stmt_top_rollup_entities + ("limit {0}".format(amt))
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt) as qry:
            rtn = [r for r in qry.cursor()]
            rtn = rtn if rtn else []
            tangelo.content_type("application/json")
            return {"entities": rtn}
Esempio n. 25
0
def seedSearch(*args):
    email_id = urllib.unquote(nth(args, 0))
    if not email_id:
        return tangelo.HTTPStatusCode(400, "missing argument email_id")
    line_num = findLineNum(email_id)
    seed_url = "{0}/firstemail/{1}".format(daemon_url, line_num)
    request(seed_url)
    next_url = "{0}/getNextEmail".format(daemon_url)
    start_point = request(next_url)
    if not start_point:
        return tangelo.HTTPStatusCode(400, "failed to set starting email")
    return findEmailId(start_point)
Esempio n. 26
0
def getAttachmentsSender(*args):
    sender=urllib.unquote(nth(args, 0, ''))
    if not sender:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    tangelo.content_type("application/json")        
    stmt = (
        " select id, dir, datetime, from_addr, tos, ccs, bccs, subject, attach, bodysize "
        " from email "
        " where from_addr = %s and attach != '' "
    )
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt, sender) as qry:
            rtn = [[ val.encode('utf-8') if isinstance(val, basestring) else str(val) for val in row] for row in qry.cursor()]
            return { "sender": sender, "email_attachments" : rtn }
Esempio n. 27
0
def getAllAttachmentBySender(*args, **kwargs):
    tangelo.log("getAttachmentsSender(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)
    sender = nth(args, 0, '')
    if not data_set_id:
        return tangelo.HTTPStatusCode(
            400, "invalid service call - missing data_set_id")
    if not sender:
        return tangelo.HTTPStatusCode(400,
                                      "invalid service call - missing sender")

    tangelo.content_type("application/json")

    return get_attachments_by_sender(data_set_id, sender, start_datetime,
                                     end_datetime, size)
Esempio n. 28
0
def getAttachmentsSender(*args):
    sender = urllib.unquote(nth(args, 0, ''))
    if not sender:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing id")

    tangelo.content_type("application/json")
    stmt = (
        " select id, dir, datetime, from_addr, tos, ccs, bccs, subject, attach, bodysize "
        " from email "
        " where from_addr = %s and attach != '' ")
    with newman_connector() as read_cnx:
        with execute_query(read_cnx.conn(), stmt, sender) as qry:
            rtn = [[
                val.encode('utf-8')
                if isinstance(val, basestring) else str(val) for val in row
            ] for row in qry.cursor()]
            return {"sender": sender, "email_attachments": rtn}
Esempio n. 29
0
def getAttachCount(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getAttachCount(args: %s kwargs: %s)" % (str(args), str(kwargs)))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)

    attach_type = urllib.unquote(nth(args, 0, ''))
    if not attach_type:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing attach_type")

    attach_type = 'all' #hack for now
    email_address_list = parseParamEmailAddress(**kwargs);

    if not email_address_list :
        activity = get_total_attachment_activity(data_set_id, data_set_id, query_function=attachment_histogram, sender_email_addr="", start=start_datetime, end=end_datetime, interval="week")
        result = {"account_activity_list" :
                  [
                   {
                    "account_id" : data_set_id,
                    "data_set_id" : data_set_id,
                    "account_start_datetime" : start_datetime,
                    "account_end_datetime" : end_datetime,
                    "activities" : activity
                   }
                  ]
                 }

    else:
        result = {"account_activity_list" :
                  [
                   {
                    "account_id" : account_id,
                    "data_set_id" : data_set_id,
                    "account_start_datetime" : start_datetime,
                    "account_end_datetime" : end_datetime,
                    "activities" : get_emailer_attachment_activity(data_set_id, account_id, (start_datetime, end_datetime), interval="week")
                   } for account_id in email_address_list
                  ]
                 }

    return result
Esempio n. 30
0
def get_attachment_by_id(*args, **kwargs):

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)

    attachment_id=nth(args, 0, '')

    if not attachment_id:
        attachment_id = parseParamAttachmentGUID(**kwargs)

    cherrypy.log("email.get_attachments_sender(index=%s, attachment_id=%s)" % (data_set_id, attachment_id))
    if not data_set_id:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing index")
    if not attachment_id:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing attachment_id")

    attachment = es().get(index=data_set_id, doc_type="attachments", id=attachment_id)

    if not attachment:
        return tangelo.HTTPStatusCode(400, "no attachments found for (index=%s, attachment_id=%s)" % (data_set_id, attachment_id))

    attachment = attachment["_source"]
    ext = attachment["extension"]
    filename = attachment["filename"]

    mime_type = mimetypes.guess_type(filename)[0]

    if not mime_type:
        tangelo.content_type("application/x-download")
        header("Content-Disposition", 'attachment; filename="{}"'.format(filename))
    else:
        tangelo.content_type(mime_type)
        header("Content-Disposition", 'inline; filename="{}"'.format(filename))

    content = attachment["contents64"]
    bytes = base64.b64decode(content)
    # dump(bytes, filename)

    as_str = str(bytes)
    tangelo.log(str(len(as_str)), "Uploading Attachment - length = ")

    return as_str
Esempio n. 31
0
def get_top_entities(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("entity.get_top_entities(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    top_count = int(urllib.unquote(nth(args, 0, "20")))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)
    email_address_list = parseParamEmailAddress(**kwargs)

    # TODO set from UI
    qs = parseParamTextQuery(**kwargs)

    if not email_address_list:
        # TODO qs not being evaluated in inner filter called by this method
        entities = get_entity_histogram(data_set_id,
                                        "emails",
                                        qs=qs,
                                        date_bounds=(start_datetime,
                                                     end_datetime))[:top_count]
        result = {
            "entities":
            [[str(i), entity["type"], entity["key"], entity["doc_count"]]
             for i, entity in enumerate(entities)]
        }

    else:
        # TODO qs not being evaluated in inner filter called by this method
        entities = get_entity_histogram(data_set_id,
                                        "emails",
                                        email_address_list,
                                        qs=qs,
                                        date_bounds=(start_datetime,
                                                     end_datetime))[:top_count]
        result = {
            "entities":
            [[str(i), entity["type"], entity["key"], entity["doc_count"]]
             for i, entity in enumerate(entities)]
        }

    return result
Esempio n. 32
0
def get_top_entities(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("entity.get_top_entities(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    top_count=int(urllib.unquote(nth(args, 0, "20")))

    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)
    email_address_list = parseParamEmailAddress(**kwargs);

    # TODO set from UI
    qs = parseParamTextQuery(**kwargs)

    if not email_address_list :
        # TODO qs not being evaluated in inner filter called by this method
        entities = get_entity_histogram(data_set_id, "emails", qs=qs, date_bounds=(start_datetime, end_datetime))[:top_count]
        result = {"entities" :
                  [
                   [
                    str(i),
                    entity ["type"],
                    entity ["key"],
                    entity ["doc_count"]
                   ] for i,entity in enumerate(entities)
                  ]
                 }
        
    else:
        # TODO qs not being evaluated in inner filter called by this method
        entities = get_entity_histogram(data_set_id, "emails", email_address_list, qs=qs, date_bounds=(start_datetime, end_datetime))[:top_count]
        result = {"entities" :
                  [
                   [
                    str(i),
                    entity ["type"],
                    entity ["key"],
                    entity ["doc_count"]
                   ] for i,entity in enumerate(entities)
                  ]
                 }

    return result    
Esempio n. 33
0
def getAccountActivity(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getAccountActivity(args: %s kwargs: %s)" % (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(**kwargs)

    account_type = urllib.unquote(nth(args, 0, ''))
    if not account_type:
        return tangelo.HTTPStatusCode(400, "invalid service call - missing account_type")

    email_address_list = parseParamEmailAddress(**kwargs);

    if not email_address_list :
        result = {"account_activity_list" :
                  [
                   {
                    "account_id" : data_set_id,
                    "data_set_id" : data_set_id,
                    "account_start_datetime" : start_datetime,
                    "account_end_datetime" : end_datetime,
                    "activities" : get_email_activity(data_set_id, data_set_id, date_bounds=(start_datetime, end_datetime), interval="week")
                   }
                  ]
                 }
    else:
        result = {"account_activity_list" :
                  [
                   {
                    "account_id" : account_id,
                    "data_set_id" : data_set_id,
                    "account_start_datetime" : start_datetime,
                    "account_end_datetime" : end_datetime,
                    "activities" : get_email_activity(data_set_id, data_set_id, account_id, date_bounds=(start_datetime, end_datetime), interval="week")
                   } for account_id in email_address_list
                  ]
                 }


    return result
Esempio n. 34
0
def getAttachFileType(*args, **kwargs):
    tangelo.content_type("application/json")
    tangelo.log("getAttachFileType(args: %s kwargs: %s)" %
                (str(args), str(kwargs)))
    data_set_id, start_datetime, end_datetime, size = parseParamDatetime(
        **kwargs)

    top_count = int(size)

    attach_type = urllib.unquote(nth(args, 0, ''))
    if not attach_type:
        attach_type = 'all'  #hack for now

    email_address_list = parseParamEmailAddress(**kwargs)

    if not email_address_list:
        file_types = get_top_attachment_types(
            data_set_id,
            date_bounds=(start_datetime, end_datetime),
            num_top_attachments=top_count)[:top_count]
    else:
        #TODO: implement populating the attachment file-types under individual email-accounts; simulate result for now
        file_types = get_top_attachment_types(
            data_set_id,
            date_bounds=(start_datetime, end_datetime),
            num_top_attachments=top_count)[:top_count]

    result = {
        "account_id": data_set_id,
        "data_set_id": data_set_id,
        "account_start_datetime": start_datetime,
        "account_end_datetime": end_datetime,
        "types": file_types
    }

    return result
Esempio n. 35
0
def isRight(either):
    return nth(either, 1)
Esempio n. 36
0
def right(either):
    return nth(either, 1)
Esempio n. 37
0
def processLine(line):
    items= line.split('\t')
    body = nth(items, 15, '')
    body = re.sub(r'[^\x00-\x7F]',' ', body)
    body = body.replace('[:newline:]',' ').replace('[', '').replace(']', '').replace('mailto:', '')
    return (nth(items, 0), nth(items, 1), nth(items, 10), nth(items, 14), body)
Esempio n. 38
0
def clean_string(sz, expr_list):
    return reduce(lambda x,r: re.sub(nth(r,0),nth(r,1,' '), x), expr_list, sz)
Esempio n. 39
0
from newman.utils.file import slurpA
from newman.utils.functions import head,last,nth

if __name__ == "__main__":

    recipients ={}

    SourceEmail = sys.argv[1]

    lines = slurpA("tmp/exploded.csv")
    for line in lines:
        (dt,src,target) = line.strip().split('\t')        
        
        if src != SourceEmail or target == SourceEmail:
            continue
        else:

            if target in recipients:
                recipients[target] += 1
            else:
                recipients[target] = 1

    ranked = sorted(recipients.items(),key=lambda x:(-x[1],x[0]))[:20]
    top = float(nth(head(ranked), 1))
    step = 1.0/top
    fn = lambda x,y : (x, y * step)

    for k,v in ranked:
        print "{0:.2f}:{1}".format((v*step), k)
Esempio n. 40
0
def getIngestLog(*args):
    job_id = nth(args, 0)
    logfile = "{}/{}.tee.log".format(work_dir, job_id)    
    sz = slurp(logfile)
    tangelo.content_type("application/json")    
    return {'log' : sz }
Esempio n. 41
0
def left(either):
    return nth(either, 0)
Esempio n. 42
0
def isRight(either):
    return nth(either, 1)
Esempio n. 43
0
def right(either):
    return nth(either, 1)
Esempio n. 44
0
from newman.utils.file import slurpA
from newman.utils.functions import head, last, nth

if __name__ == "__main__":

    recipients = {}

    SourceEmail = sys.argv[1]

    lines = slurpA("tmp/exploded.csv")
    for line in lines:
        (dt, src, target) = line.strip().split('\t')

        if src != SourceEmail or target == SourceEmail:
            continue
        else:

            if target in recipients:
                recipients[target] += 1
            else:
                recipients[target] = 1

    ranked = sorted(recipients.items(), key=lambda x: (-x[1], x[0]))[:20]
    top = float(nth(head(ranked), 1))
    step = 1.0 / top
    fn = lambda x, y: (x, y * step)

    for k, v in ranked:
        print "{0:.2f}:{1}".format((v * step), k)
Esempio n. 45
0
def getState(*args):
    email_addr = nth(args, 0)
    logfile = "{}/{}.log".format(work_dir, email_addr)
    sz = slurp(logfile)
    tangelo.content_type("application/json")
    return {'log': sz}
Esempio n. 46
0
def isLeft(either):
    return not nth(either, 0)
Esempio n. 47
0
def getIngestLog(*args):
    job_id = nth(args, 0)
    logfile = "{}/{}.tee.log".format(work_dir, job_id)
    sz = slurp(logfile)
    tangelo.content_type("application/json")
    return {'log': sz}
Esempio n. 48
0
def left(either):
    return nth(either, 0)
Esempio n. 49
0
 def getTopics(_id):
     with newman_connector() as cnx:
         with execute_query(cnx.conn(), topic_stmt, _id) as qry:
             return [{'name': formatName(nth(o, 0)), 'score': formatScore(nth(o, 1)) } 
                     for o in qry.cursor()]
Esempio n. 50
0
def clean_string(sz, expr_list):
    return reduce(lambda x, r: re.sub(nth(r, 0), nth(r, 1, ' '), x), expr_list,
                  sz)
Esempio n. 51
0
def getState(*args):
    email_addr = nth(args, 0)
    logfile = "{}/{}.log".format(work_dir, email_addr)
    sz = slurp(logfile)
    tangelo.content_type("application/json")    
    return {'log' : sz }
Esempio n. 52
0
def isLeft(either):
    return not nth(either, 0)