Exemple #1
0
def reindexItems():
    items = {}
    fetchedItems = yield db.get_range_slice('items', count=10000, reverse=True)
    for row in fetchedItems:
        items[row.key] = utils.supercolumnsToDict(row.columns)

    log.msg("Total items:", len(fetchedItems))
    for i, row in enumerate(fetchedItems):
        itemId = row.key
        item = items[itemId]
        log.msg(i+1, itemId)

        if 'meta' not in item or 'owner' not in item['meta']:
            continue

        owner = item['meta']['owner']
        try:
            col = yield db.get(owner, "entities", "org", "basic")
            ownerOrgId = col.column.value
        except:
            log.msg("Error when indexing:", itemId)
            continue

        parentId = item['meta'].get('parent', None)

        if not parentId:
            yield search.solr.updateItemIndex(itemId, item, ownerOrgId)
        else:
            yield search.solr.updateItemIndex(itemId, item, ownerOrgId, conv=items[parentId])
Exemple #2
0
def reindexProfileContent():
    rows = yield db.get_range_slice('entities', count=1000)
    for row in rows:
        entityId = row.key
        log.msg(entityId)
        entity = Entity(entityId, utils.supercolumnsToDict(row.columns))
        if entity.basic.get('type', '') == 'user':
            orgId = entity.basic.get('org', '')
            if orgId:
                yield search.solr.updatePeopleIndex(entityId, entity, orgId)
Exemple #3
0
def updateData():

    convIds = set()
    rows = yield db.get_range_slice('item_files', count=1000)

    for row in rows:
        convId = row.key
        convIds.add(convId)
        attachments = utils.supercolumnsToDict(row.columns)
        for attachmentId in attachments:
            for timeuuid in attachments[attachmentId]:
                encodedTimeUUID, aid, name, size, ftype = attachments[attachmentId][timeuuid].split(':')
                yield db.insert(attachmentId, "attachmentVersions", "%s:%s:%s:%s" %(aid, name, size, ftype), timeuuid)

    rows = yield db.get_range_slice('items', count=10000)
    for row in rows:
        itemId = row.key
        item = utils.supercolumnsToDict(row.columns)
        attachments = {}
        for attachmentId in item.get('attachments', {}):
            if len(item['attachments'][attachmentId].split(':')) == 4:
                x,name, size, ftype = item['attachments'][attachmentId].split(':')
                attachments[attachmentId] = "%s:%s:%s" %(name, size, ftype)
        if attachments:
            yield db.remove(itemId, 'items', super_column='attachments')
            yield db.batch_insert(itemId, "items", {"attachments": attachments})


    rows = yield db.get_range_slice('mConversations', count=10000)
    for row in rows:
        messageId = row.key
        message = utils.supercolumnsToDict(row.columns)
        attachments = {}
        print messageId
        for attachmentId in message.get('attachments', {}):
            if len(message['attachments'][attachmentId].split(':')) == 4:
                x,name, size, ftype = message['attachments'][attachmentId].split(':')
                attachments[attachmentId] = "%s:%s:%s" %(name, size, ftype)
        if attachments:
            yield db.remove(messageId, 'mConversations', super_column='attachments')
            yield db.batch_insert(messageId, "mConversations", {"attachments": attachments})
def updateData():
    yield db.truncate('user_files')
    try:
        yield db.get('asdf', 'entityFeed_files', uuid.uuid1().bytes)
    except ttypes.InvalidRequestException as exception:
        log.msg(exception)
        raise Exception('entityFeed_files CF missing, create the CF')
    except ttypes.NotFoundException:
        pass
    entities = {}
    items = {}

    rows = yield db.get_range_slice('items', count=10000, reverse=True)
    for row in rows:
        itemId = row.key
        item = utils.supercolumnsToDict(row.columns)
        items[itemId]=item

    for itemId in items:
        item =  items[itemId]
        log.msg(itemId)
        if 'meta' not in item:
            continue

        # Add org to all items
        try:
            owner = item['meta']['owner']
            col = yield db.get(owner, "entities", 'org', 'basic')
            ownerOrgId = col.column.value
            yield db.insert(itemId, 'items', ownerOrgId, 'org', 'meta')
        except Exception as e:
            if item['meta'].get('type', '') == 'feedback':
                yield db.insert(itemId, 'items', owner, 'org', 'meta')

        # Fix ACLs
        if 'parent' not in item['meta']:
            acl = item['meta']['acl']
            convOwner = item['meta']['owner']
            convId = itemId

            if acl == 'company':
                col = yield db.get(convOwner, "entities", "org", "basic")
                ownerOrgId = col.column.value
                acl = pickle.dumps({"accept":{"orgs":[ownerOrgId]}})
                yield db.insert(convId, 'items', acl, 'acl', 'meta')
            else:
                try:
                    acl = pickle.loads(acl)
                    if 'accept' in acl and 'friends' in acl['accept'] and isinstance(acl['accept']['friends'], bool):
                        del acl['accept']['friends']
                        acl = pickle.dumps(acl)
                        yield db.insert(convId, 'items', acl, 'acl', 'meta')
                except :
                    log.msg('cannot unpack acl', acl)

        # Migrate files
        #    truncate user_files
        #    update user_files and entityFeed_files
        if 'owner' in item['meta'] and 'attachments' in item:
            ownerId = item['meta']['owner']
            if ownerId not in entities:
                cols = yield db.get_slice(ownerId, 'entities', ['basic'])
                entities.update({ownerId: utils.supercolumnsToDict(cols)})
            for attachmentId in item['attachments']:
                orgId = entities[ownerId]['basic']['org']
                timeuuid, name = item['attachments'][attachmentId].split(':')[:2]
                timeuuid = utils.decodeKey(timeuuid)
                val = '%s:%s:%s:%s' % (attachmentId, name, itemId, ownerId)
                yield db.insert(ownerId, "user_files", val, timeuuid)
                if 'parent' not in item['meta'] and item['meta'].get('acl', ''):
                    _entities = yield utils.expandAcl(ownerId, orgId, item['meta']['acl'],
                                                      itemId, ownerId, True)
                    for entityId in _entities:
                        yield db.insert(entityId, "entityFeed_files", val, timeuuid)

        # Migrate items
        # Meta fields in "link", "event" and "poll"
        if item['meta'].get('type', None) in ['link', 'poll', 'event']:
            itemMeta = item['meta']
            itemType = itemMeta['type']
            updated = {}

            if itemType == "link":
                if 'url' in itemMeta:
                    updated['link_url'] = itemMeta['url']
                if 'title' in itemMeta:
                    updated['link_title'] = itemMeta['title']
                if 'summary' in itemMeta:
                    updated['link_summary'] = itemMeta['summary']
                if 'imgSrc' in itemMeta:
                    updated['link_imgSrc'] = itemMeta['imgSrc']
                if 'embedType' in itemMeta:
                    updated['link_embedType'] = itemMeta['embedType']
                if 'embedSrc' in itemMeta:
                    updated['link_embedSrc'] = itemMeta['embedSrc']
                if 'embedHeight' in itemMeta:
                    updated['link_embedHeight'] = itemMeta['embedHeight']
                if 'embedWidth' in itemMeta:
                    updated['link_embedWidth'] = itemMeta['embedWidth']
            elif itemType == 'poll':
                if 'question' in itemMeta:
                    updated['comment'] = itemMeta['question']
            else:
                print 'Found an event:', itemId

            if updated:
                yield db.batch_insert(itemId, 'items', {'meta': updated})


    #
    # Create poll indexes for feed and userItems
    #
    rows = yield db.get_range_slice('entities', count=10000, reverse=True)
    mutations = {}
    for row in rows:
        entityId = row.key
        entity = utils.supercolumnsToDict(row.columns)

        if entity['basic']['type'] != 'user':
            continue

        d1 = db.get_slice(entityId, 'feed', count=10000)
        d2 = db.get_slice(entityId, 'userItems', count=10000)

        results = yield d1
        for col in results:
            value = col.column.value
            if value in items:
                if items.get(value, {}).get('meta', {}).get('type', '') == 'poll':
                    mutations.setdefault(entityId, {}).setdefault('feed_poll', {}).update({col.column.name: value})

        results = yield d2
        for col in results:
            value = col.column.value
            responseType, itemId, convId, convType, others = value.split(':', 4)
            if convType == 'poll':
                mutations.setdefault(entityId, {}).setdefault('userItems_poll', {}).update({col.column.name: value})
    yield db.batch_mutate(mutations)

    #Group type changed from public-private to open-closed.
    rows = yield db.get_range_slice('entityGroupsMap', count=1000)
    groupIds = set()
    for row in rows:
        for col in row.columns:
            name_, groupId = col.column.name.split(':')
            groupIds.add(groupId)

    cols = yield db.multiget_slice(groupIds, "entities")
    groups = utils.multiSuperColumnsToDict(cols)
    for groupId in groups:
        access = groups[groupId]['basic']['access'].lower()
        if access == 'public':
            yield db.insert(groupId, 'entities', 'open', 'access', 'basic')
        elif access.lower() == 'private':
            yield db.insert(groupId, 'entities', 'closed', 'access', 'basic')

    #Fix entityGroupsMap
    rows = yield db.get_range_slice('entityGroupsMap', count=1000)
    for row in rows:
        entityId = row.key
        for col in row.columns:
            name_, groupId = col.column.name.split(':')
            if col.column.name != '%s:%s'%(groups[groupId]['basic']['name'].lower(), groupId):
                yield db.remove(entityId, 'entityGroupsMap', col.column.name)
                yield db.insert(entityId, 'entityGroupsMap', '', '%s:%s' %(groups[groupId]['basic']['name'].lower(), groupId))
Exemple #5
0
def getNewUserCount(startDate, endDate, count=100, column_count=100, mail_to=''):
    frm_to = startDate + ' ' + endDate
    startDate = datetime.datetime.strptime(startDate, dateFormat)
    endDate = datetime.datetime.strptime(endDate, dateFormat)
    if endDate <= startDate:
        log.msg("end-date should be later than start-date")
        raise Exception("end-date should be later than start-date")

    startTime = time.mktime(startDate.timetuple())
    endTime = time.mktime(endDate.timetuple())

    toFetchCount = count +1
    toFetchColumnCount = column_count +1
    new_domains = []
    start = ''
    stats = {}
    data = {}

    while 1:
        domains = yield db.get_range_slice('domainOrgMap',
                                            count=toFetchCount,
                                            start=start)

        for row in domains[:count]:
            domain = row.key
            for col in row.columns[:count]:
                if domain not in data.setdefault(col.column.name, {}).setdefault("domain", []):
                    data[col.column.name]["domain"].append((domain, col.column.timestamp/1e6))
                column_timestamp = col.column.timestamp/1000000.0
                if column_timestamp < endTime and column_timestamp >= startTime:
                    if domain not in new_domains:
                        new_domains.append(domain)

        if len(domains) < toFetchCount:
            break
        else:
            start = domains[-1].key
    stats = {frm_to: {"newDomains":new_domains, "newDomainCount": len(new_domains) }}

    start =  ''
    new_users = {}
    usersOrgMap = {}
    totalNewUsers = 0
    totalUsers ={}
    while 1:
        users = yield db.get_range_slice('orgUsers',
                                        start=start,
                                        count=toFetchCount,
                                        column_count=toFetchColumnCount)
        for row in users[:count]:
            orgId = row.key
            totalUsers[orgId] = 0
            for col in row.columns[:column_count]:
                userId  = col.column.name
                usersOrgMap[userId] = orgId
                if userId not in data.setdefault(orgId, {}).setdefault("users", {}):
                    data[orgId]['users'][userId] = {"newItems":0, "items":0}
                column_timestamp = col.column.timestamp/1000000.0
                if column_timestamp < endTime and column_timestamp >= startTime:
                    if col.column.name not in new_users.setdefault(orgId, []):
                        new_users[orgId].append(userId)
                if column_timestamp < endTime:
                    totalUsers[orgId] +=1
            if len(row.columns) == toFetchColumnCount:
                column_start = row.columns[-1].column.name
                while 1:
                    _users = yield db.get_range_slice('orgUsers',
                                                      count=1,
                                                      start=orgId,
                                                      column_start=column_start,
                                                      column_count=toFetchColumnCount)
                    for col in _users[0].columns[:column_count]:
                        userId = col.column.name
                        usersOrgMap[userId] = orgId
                        if userId not in data.setdefault(orgId, {}).setdefault("users", {}):
                            data[orgId]['users'][userId] = {'newItems':0, 'items':0}
                        column_timestamp = col.column.timestamp/1000000.0
                        if column_timestamp < endTime and column_timestamp >= startTime:
                            if col.column.name not in new_users[orgId]:
                                new_users[orgId].append(userId)
                        if column_timestamp < endTime:
                            totalUsers[orgId] +=1
                    if len(_users[0].columns) == toFetchColumnCount:
                        column_start = _users[0].columns[-1].column.name
                    else:
                        break
            totalNewUsers += len(new_users.get(orgId, []))
        if len(users) < toFetchCount:
            break
        else:
            start = users[-1].key

    stats[frm_to]["signups"] = totalNewUsers


    start = ''
    while 1:
        rows = yield db.get_range_slice('userItems',
                                        start=start,
                                        count=toFetchCount,
                                        column_count = toFetchColumnCount)
        for row in rows[:count]:
            userId = row.key
            for col in row.columns[:column_count]:

                if userId not in usersOrgMap:
                    data['no-org'] = {"users":{userId:{"items": 0, "newItems": 0}}}
                    orgId = 'no-org'
                else:
                    orgId = usersOrgMap[userId]
                if userId not in data[orgId]['users'] :
                    data[orgId]['users'] = {'items': 0 , 'newItems': 0}
                column_timestamp = col.column.timestamp/1000000.0
                if column_timestamp < endTime and column_timestamp >= startTime:
                    data[orgId]['users'][userId]['newItems'] += 1
                if column_timestamp < endTime:
                    data[orgId]['users'][userId]['items'] += 1
            if len(row.columns) == toFetchColumnCount:
                cstart = row.columns[-1].column.name
                while 1:
                    userItems = yield db.get_range_slice('userItems', count=1,
                                                        start=userId,
                                                        column_start= cstart,
                                                        column_count= toFetchColumnCount)
                    for col in userItems[0].columns[:column_count]:
                        column_timestamp = col.column.timestamp/1000000.0
                        if column_timestamp < endTime and column_timestamp >= startTime:
                            data[orgId]['users'][userId]['newItems'] += 1
                        #if userId in data[orgId]['users'] :
                        if column_timestamp < endTime:
                            data[orgId]['users'][userId]['items'] += 1
                    if len(userItems[0].columns) == toFetchColumnCount:
                        cstart = userItems[0].columns[-1].column.name
                    else:
                        break

        if len(rows) < toFetchCount:
            break
        else:
            start = rows[-1].key

    stats["domain"] = OrderedDict()
    sortedOrgIds = sorted(data, key=lambda x: data[x]["domain"][0][1])
    for orgId in sortedOrgIds:
        domainName = ",".join([x[0] for x in data[orgId]['domain']])
        stats["domain"][domainName] = {}
        stats["domain"][domainName]["newUsers"] = len(new_users.get(orgId, []))
        stats["domain"][domainName]["totalUsers"] = totalUsers.get(orgId, 0)
        stats["domain"][domainName]["newItems"] = sum([data[orgId]['users'][x]['newItems'] for x in data[orgId].get('users', {})])
        stats["domain"][domainName]["items"] =    sum([data[orgId]['users'][x]['items'] for x in data[orgId].get('users', {})])

    if not mail_to:
        print pprint.pprint(stats)
    subject = "Stats: %s to %s" % (startDate.strftime(dateFormat), endDate.strftime(dateFormat))
    textPart = repr(stats)
    rootUrl = config.get('General', 'URL')
    brandName = config.get('Branding', 'Name')
    htmlPart = getBlock("emails.mako", "html_stats",  **{"stats":stats, "frm_to": frm_to, 'rootUrl': rootUrl, 'brandName': brandName})
    for mailId in mail_to:
        yield utils.sendmail(mailId, subject, textPart, htmlPart)
def migrateFriendsToFollowers():
    # Migrate all friends to followers/subscriptions.
    connectionRows = yield db.get_range_slice('connections', count=10000)
    for connectionRow in connectionRows:
        userId = connectionRow.key
        friends = [x.super_column.name for x in connectionRow.columns]
        yield db.batch_insert(userId, "followers", dict([(x, '') for x in friends]))
        yield db.batch_mutate(dict([(x, {'subscriptions': {userId: ''}}) for x in friends]))
    log.msg('>>>>>>>> Converted all connections to following.')

    # Remove name indices of friends
    entityRows = yield db.get_range_slice('entities', count=10000, names=['basic'])
    entities = dict([(x.key, utils.supercolumnsToDict(x.columns)) for x in entityRows])
    userIds = [x for x in entities.keys() if entities[x]['basic']['type'] == 'user']
    for userId in userIds:
        yield db.remove(userId, 'displayNameIndex')
        yield db.remove(userId, 'nameIndex')
    log.msg('>>>>>>>> Removed name indices for friends.')

    # Convert all "connection" activity to "follow".
    # We already have two separate items, so subtype conversion should be good.
    itemRows = yield db.get_range_slice('items', count=10000, names=['meta'])
    items = dict([(x.key, utils.supercolumnsToDict(x.columns)) for x in itemRows])
    connectionItems = [x for x in items.keys()\
                       if items[x]['meta'].get('type', '') == 'activity'\
                          and items[x]['meta']['subType'] == 'connection']
    yield db.batch_mutate(dict([(x, {'items':{'meta':{'subType':'following'}}}) for x in connectionItems]))
    log.msg('>>>>>>>> All connection items converted to following.')

    # Remove all friend requests from pendingConnections
    pendingRows = yield db.get_range_slice('pendingConnections', count=10000)
    for pendingRow in pendingRows:
        userId = pendingRow.key
        pendingFriendRequestIds = [x.column.name for x in pendingRow.columns \
                                   if not x.column.name.startswith('G')]
        if pendingFriendRequestIds:
            yield db.batch_remove({'pendingConnections': [userId]}, names=pendingFriendRequestIds)
    log.msg('>>>>>>>> Removed pending friend requests.')

    # Remove all friend requests from latest
    yield db.batch_remove({'latest': userIds}, names='people')
    log.msg('>>>>>>>> Removed friend requests from latest.')

    # Remove all friend-request-accepted notifications
    notifyMutations = {}
    for userId in userIds:
        items = yield db.get_slice(userId, "notificationItems", super_column=':FA')
        if items:
            names = [col.column.name for col in items]
            colmap = dict([(x, None) for x in names])
            deletion = Deletion(time.time() * 1000000, 'notifications',
                                SlicePredicate(column_names=names))
            notifyMutations[userId] = {'notifications': colmap, 'latest': [deletion]}
            yield db.remove(userId, 'notificationItems', super_column=':FA')
    if notifyMutations:
        yield db.batch_mutate(notifyMutations)
    log.msg('>>>>>>>> Removed friend notifications from notifications and latest.')

    # Finally, remove the connections column family.
    yield db.system_drop_column_family('connections')
    yield db.system_drop_column_family('connectionsByTag')
    log.msg('>>>>>>>> Removed the connections column family.')