Esempio n. 1
0
def post_actions(seq, h=1, adres_slice=3):
    sessions = dict()
    nums = dict()

    for key in seq:
        sessions[key] = [(i[0][:adres_slice], i[1])
                         for i in sorted(seq[key], key=lambda x: x[1])]
        nums[key] = [
            i[1] for i in sorted(Counter(sessions[key]).items(),
                                 key=lambda x: x[0][1])
        ]
        sessions[key] = [
            i[0] for i in sorted(set(sessions[key]), key=lambda x: x[1])
        ]

    # ngrams
    print str(h) + 'grams'
    ng = Ngrams(sessions=sessions, n=h)

    # price-list
    vals = list(
        map(lambda x: (x[0], x[1] * 10**(len(x[0])) - 1), ng.ngrams.items()))
    tag_list = sorted(list(set(vals)), key=lambda x: x[1], reverse=True)

    # tagging
    res = actions(sessions, tag_list, ng)

    act = []
    for key in seq:
        superactions = []
        for i in range(len(res[key])):
            superactions += [res[key][i] for j in range(nums[key][i])]
        if len(seq[key]) > h:
            for j in range(len(seq[key])):
                i = seq[key][j]
                action = {
                    '_op_type': 'update',
                    u'_id': i[2],
                    u'_type': u'auditd-parent',
                    u'_index': u'wailt'
                }
                if superactions[j] == (-1, ):
                    superactions[j] = (-1, -1)
                action['doc'] = {
                    'action': superactions[j][0],
                    'action_separator': superactions[j][1]
                }
                act.append(action)
                if len(act) > 10000:
                    print 'acted'
                    bulk(es, actions=act)
                    act = []
    bulk(es, actions=act)