Ejemplo n.º 1
0
def migrate(dry_run=True):
    added_logs = NodeLog.find(Q('action', 'eq', PRIMARY_INSTITUTION_CHANGED))
    for log in added_logs:
        logger.info(
            'Log with id <{}> being updated for affiliation added'.format(
                log._id))
        log.action = NodeLog.AFFILIATED_INSTITUTION_ADDED
        log.save()

    removed_logs = NodeLog.find(Q('action', 'eq', PRIMARY_INSTITUTION_REMOVED))
    for log in removed_logs:
        logger.info(
            'Log with id <{}> being updated for affiliation removed'.format(
                log._id))
        log.action = NodeLog.AFFILIATED_INSTITUTION_REMOVED
        log.save()

    nodes = Node.find(Q('primary_institution', 'ne', None))
    for node in nodes:
        logger.info('Node with id <{}> and title <{}> being updated'.format(
            node._id, node.title))
        inst = node.primary_institution
        if inst not in node.affiliated_institutions:
            node.affiliated_institutions.append(inst)
        node.primary_institution = None
        node.save()
    if dry_run:
        raise RuntimeError('Dry run, transaction rolled back.')
Ejemplo n.º 2
0
def migrate(dry_run=True):
    node_logs = list(
        NodeLog.find(
            Q("action", "in", [NodeLog.PREPRINT_FILE_UPDATED, NodeLog.PREPRINT_INITIATED])
            & Q("params.preprint", "exists", False)
        )
    )

    logger.info("Preparing to migrate {} NodeLogs".format(len(node_logs)))

    count = 0

    for log in node_logs:
        preprint = None
        node_id = log.params.get("node")

        try:
            preprint = PreprintService.find_one(Q("node", "eq", node_id))
        except NoResultsFound:
            logger.error("Skipping {}, preprint not found for node: {}".format(log._id, node_id))
            continue

        logger.info("Migrating log - {} - to add params.preprint: {}, ".format(log._id, preprint._id))

        log.params["preprint"] = preprint._id
        log.save()
        count += 1

    logger.info("Migrated {} logs".format(count))
def do_migration(records, dry=False):
    for node in records:
        logs = list(NodeLog.find(Q('was_connected_to', 'contains', node)))
        existing_logs = node.logs
        for log in logs:
            if not log.node__logged:
                continue
            log_node = log.node__logged[0]
            # if the log_node is not contained in the node parent list then it doesn't belong to this node
            if log_node not in get_all_parents(node):
                logger.info(
                    'Excluding log {} from list because it is not associated with node {}'
                    .format(log, node))
                logs.remove(log)

        with TokuTransaction():
            node.logs = logs + existing_logs
            node.system_tags.append(SYSTEM_TAG)
            node_type = 'registration' if node.is_registration else 'fork'
            logger.info('Adding {} logs to {} {}'.format(
                len(logs), node_type, node))
            if not dry:
                try:
                    node.save()
                except Exception as err:
                    logger.error(
                        'Could not update logs for node {} due to error'.
                        format(node._id))
                    logger.exception(err)
                    logger.error('Skipping...')
def do_migration(records, dry=False):
    for node in records:
        logs = list(NodeLog.find(Q('was_connected_to', 'contains', node)))
        existing_logs = node.logs
        for log in logs:
            if not log.node__logged:
                continue
            log_node = log.node__logged[0]
            # if the log_node is not contained in the node parent list then it doesn't belong to this node
            if log_node not in get_all_parents(node):
                logger.info('Excluding log {} from list because it is not associated with node {}'.format(log, node))
                logs.remove(log)

        with TokuTransaction():
            node.logs = logs + existing_logs
            node.system_tags.append(SYSTEM_TAG)
            node_type = 'registration' if node.is_registration else 'fork'
            logger.info('Adding {} logs to {} {}'.format(len(logs), node_type, node))
            if not dry:
                try:
                    node.save()
                except Exception as err:
                    logger.error('Could not update logs for node {} due to error'.format(node._id))
                    logger.exception(err)
                    logger.error('Skipping...')
Ejemplo n.º 5
0
def migrate(dry_run=True):
    node_logs = list(NodeLog.find(
        Q('action', 'in', [NodeLog.PREPRINT_FILE_UPDATED, NodeLog.PREPRINT_INITIATED]) &
        Q('params.preprint', 'exists', False)
    ))

    logger.info('Preparing to migrate {} NodeLogs'.format(len(node_logs)))

    count = 0

    for log in node_logs:
        preprint = None
        node_id = log.params.get('node')

        try:
            preprint = PreprintService.find_one(Q('node', 'eq', node_id))
        except NoResultsFound:
            logger.error('Skipping {}, preprint not found for node: {}'.format(log._id, node_id))
            continue

        logger.info(
            'Migrating log - {} - to add params.preprint: {}, '.format(log._id, preprint._id)
        )

        log.params['preprint'] = preprint._id
        log.save()
        count += 1

    logger.info('Migrated {} logs'.format(count))
Ejemplo n.º 6
0
def user_last_log(user, query=None):
    if query:
        query &= Q('user', 'eq', user._id)
    else:
        query = Q('user', 'eq', user._id)

    node_logs = NodeLog.find(query)
    return node_logs[node_logs.count()-1].date
Ejemplo n.º 7
0
def find_invalid_logs():
    for log in NodeLog.find(Q('action', 'eq', NodeLog.WIKI_DELETED)):
        # Derive UTC datetime object from ObjectId
        id_date = ObjectId(log._id).generation_time
        id_date = id_date.replace(tzinfo=None) - id_date.utcoffset()

        if id_date > log.date:
            yield log
Ejemplo n.º 8
0
def count_user_logs(user):
    logs = NodeLog.find(Q('user', 'eq', user._id))
    length = logs.count()
    if length == LOG_THRESHOLD:
        item = logs[0]
        if item.action == 'project_created' and item.node.is_bookmark_collection:
            length -= 1
    return length
Ejemplo n.º 9
0
def user_last_log(user, query=None):
    if query:
        query &= Q('user', 'eq', user._id)
    else:
        query = Q('user', 'eq', user._id)

    node_logs = NodeLog.find(query)
    return node_logs[node_logs.count() - 1].date
Ejemplo n.º 10
0
def count_user_logs(user):
    logs = NodeLog.find(Q('user', 'eq', user._id))
    length = logs.count()
    if length == LOG_THRESHOLD:
        item = logs[0]
        if item.action == 'project_created' and item.node.is_bookmark_collection:
            length -= 1
    return length
Ejemplo n.º 11
0
def find_invalid_logs():
    for log in NodeLog.find(Q('action', 'eq', NodeLog.WIKI_DELETED)):
        # Derive UTC datetime object from ObjectId
        id_date = ObjectId(log._id).generation_time
        id_date = id_date.replace(tzinfo=None) - id_date.utcoffset()

        if id_date > log.date:
            yield log
Ejemplo n.º 12
0
def count_user_logs(user, query=None):
    if query:
        query &= Q('user', 'eq', user._id)
    else:
        query = Q('user', 'eq', user._id)
    logs = NodeLog.find(query)
    length = logs.count()
    if length > 0:
        item = logs[0]
        if item.action == 'project_created' and item.node.is_dashboard:
            length -= 1
    return length
Ejemplo n.º 13
0
def count_user_logs(user, query=None):
    if query:
        query &= Q('user', 'eq', user._id)
    else:
        query = Q('user', 'eq', user._id)
    logs = NodeLog.find(query)
    length = logs.count()
    if length > 0:
        item = logs[0]
        if item.action == 'project_created' and item.node.is_dashboard:
            length -= 1
    return length
Ejemplo n.º 14
0
def main(dry):
    if dry:
        logging.warn('DRY mode running')
    now = datetime.utcnow()
    initiated_logs = NodeLog.find(
        Q('action', 'eq', NodeLog.PREPRINT_INITIATED) & Q('date', 'lt', now))
    for log in initiated_logs:
        try:
            preprint = PreprintService.find_one(Q('node', 'eq', log.node))
            log.params.update({
                'preprint': {
                    'id': preprint._id
                },
                'service': {
                    'name': preprint.provider.name
                }
            })
            logging.info(
                'Updating log {} from node {}, with preprint id: {}'.format(
                    log._id, log.node.title, preprint._id))
            if not dry:
                log.save()
        except NoResultsFound:
            pass

    updated_logs = NodeLog.find(
        Q('action', 'eq', NodeLog.PREPRINT_FILE_UPDATED)
        & Q('date', 'lt', now))
    for log in updated_logs:
        try:
            preprint = PreprintService.find_one(Q('node', 'eq', log.node))
            log.params.update({'preprint': {'id': preprint._id}})
            logging.info(
                'Updating log {} from node {}, with preprint id: {}'.format(
                    log._id, log.node.title, preprint._id))
            if not dry:
                log.save()
        except NoResultsFound:
            pass
Ejemplo n.º 15
0
def migrate(dry_run=True):
    added_logs = NodeLog.find(Q('action', 'eq', PRIMARY_INSTITUTION_CHANGED))
    for log in added_logs:
        logger.info('Log with id <{}> being updated for affiliation added'.format(log._id))
        log.action = NodeLog.AFFILIATED_INSTITUTION_ADDED
        log.save()

    removed_logs = NodeLog.find(Q('action', 'eq', PRIMARY_INSTITUTION_REMOVED))
    for log in removed_logs:
        logger.info('Log with id <{}> being updated for affiliation removed'.format(log._id))
        log.action = NodeLog.AFFILIATED_INSTITUTION_REMOVED
        log.save()

    nodes = Node.find(Q('primary_institution', 'ne', None))
    for node in nodes:
        logger.info('Node with id <{}> and title <{}> being updated'.format(node._id, node.title))
        inst = node.primary_institution
        if inst not in node.affiliated_institutions:
            node.affiliated_institutions.append(inst)
        node.primary_institution = None
        node.save()
    if dry_run:
        raise RuntimeError('Dry run, transaction rolled back.')
Ejemplo n.º 16
0
def get_targets():
    """
    These logs are potentially missing params['registration'] fields.  Params['node'] and original_node fields may incorrectly
    be pointing to the registration instead of the node.
    """
    logs = NodeLog.find(
        Q('action', 'eq', 'registration_cancelled') |
        Q('action', 'eq', 'retraction_approved') |
        Q('action', 'eq', 'retraction_cancelled') |
        Q('action', 'eq', 'embargo_approved') |
        Q('action', 'eq', 'embargo_cancelled') |
        Q('action', 'eq', 'embargo_terminated')
    )
    return logs
Ejemplo n.º 17
0
def get_or_create_node(node_id, sqlite_db):
    """Gets an OSF node from the sqlite cache.  If not found, pulls the node info from mongo and
    saves it.

    :param node_id: OSF node id (e.g. 'mst3k')
    :param sqlite_db: SQLite3 database handle
    :return: node dict
    """

    if node_id is None:
        return None

    cursor = sqlite_db.cursor()
    query = "SELECT * FROM nodes WHERE id='{}'".format(node_id)
    cursor.execute(query)

    nodes = cursor.fetchall()

    if len(nodes) > 1:
        raise Exception("Multiple nodes found for single node ID")

    if nodes:
        return nodes[0]

    node = Node.load(node_id)
    if node is None:
        return None

    node_public_date = None
    privacy_actions = NodeLog.find(
        Q('node', 'eq', node_id)
        & Q('action', 'in', [NodeLog.MADE_PUBLIC, NodeLog.MADE_PRIVATE])
    ).sort('-date')

    try:
        privacy_action = privacy_actions[0]
    except IndexError as e:
        pass
    else:
        if privacy_action.action == NodeLog.MADE_PUBLIC:
            node_public_date = privacy_action.date.isoformat()
            node_public_date = node_public_date[:-3] + 'Z'

    cursor.execute(
        u'INSERT INTO nodes (id, title, category, made_public_date) VALUES (?, ?, ?, ?)',
        (node_id, getattr(node, 'title'), getattr(node, 'category'), node_public_date)
    )
    sqlite_db.commit()
    return get_or_create_node(node_id, sqlite_db)
Ejemplo n.º 18
0
def main(dry):
    if dry:
        logging.warn('DRY mode running')
    now = datetime.utcnow()
    initiated_logs = NodeLog.find(Q('action', 'eq', NodeLog.PREPRINT_INITIATED) & Q('date', 'lt', now))
    for log in initiated_logs:
        try:
            preprint = PreprintService.find_one(Q('node', 'eq', log.node))
            log.params.update({
                'preprint': {
                    'id': preprint._id
                },
                'service': {
                    'name': preprint.provider.name
                }
            })
            logging.info('Updating log {} from node {}, with preprint id: {}'.format(log._id, log.node.title, preprint._id))
            if not dry:
                log.save()
        except NoResultsFound:
            pass

    updated_logs = NodeLog.find(Q('action', 'eq', NodeLog.PREPRINT_FILE_UPDATED) & Q('date', 'lt', now))
    for log in updated_logs:
        try:
            preprint = PreprintService.find_one(Q('node', 'eq', log.node))
            log.params.update({
                'preprint': {
                    'id': preprint._id
                }
            })
            logging.info('Updating log {} from node {}, with preprint id: {}'.format(log._id, log.node.title, preprint._id))
            if not dry:
                log.save()
        except NoResultsFound:
            pass
Ejemplo n.º 19
0
def get_targets():
    """
    Fetches all registration-related logs except for project_registered.

    project_registered log is not included because params already correct.
    """
    logs = NodeLog.find(
        Q('action', 'eq', 'registration_initiated') |
        Q('action', 'eq', 'registration_approved') |
        Q('action', 'eq', 'registration_cancelled') |  # On staging, there are a few inconsistencies with these.  Majority of params['node'] are registrations, but a handful are nodes.
        Q('action', 'eq', 'retraction_initiated') |
        Q('action', 'eq', 'retraction_approved') |  # params['node'] is already equal to node.  Adds registration_field below.  Will be slow.
        Q('action', 'eq', 'retraction_cancelled') |
        Q('action', 'eq', 'embargo_initiated') |
        Q('action', 'eq', 'embargo_approved') |
        Q('action', 'eq', 'embargo_completed') |
        Q('action', 'eq', 'embargo_cancelled')
    )
    return logs
Ejemplo n.º 20
0
def get_targets():
    """
    Fetches all registration-related logs except for project_registered.

    project_registered log is not included because params already correct.
    """
    logs = NodeLog.find(
        Q('action', 'eq', 'registration_initiated')
        | Q('action', 'eq', 'registration_approved')
        | Q('action', 'eq', 'registration_cancelled')
        |  # On staging, there are a few inconsistencies with these.  Majority of params['node'] are registrations, but a handful are nodes.
        Q('action', 'eq', 'retraction_initiated')
        | Q('action', 'eq', 'retraction_approved')
        |  # params['node'] is already equal to node.  Adds registration_field below.  Will be slow.
        Q('action', 'eq', 'retraction_cancelled')
        | Q('action', 'eq', 'embargo_initiated')
        | Q('action', 'eq', 'embargo_approved')
        | Q('action', 'eq', 'embargo_completed')
        | Q('action', 'eq', 'embargo_cancelled'))
    return logs
Ejemplo n.º 21
0
def get_targets():
    return NodeLog.find(
        Q('action', 'eq', NodeLog.EMBARGO_APPROVED)
        & Q('params.user', 'eq', None))
Ejemplo n.º 22
0
def get_targets():
    return NodeLog.find(Q('should_hide', 'eq', True))
Ejemplo n.º 23
0
def get_targets():
    # ... return the list of logs whose registrations we want to migrate ...
    targets = NodeLog.find(Q('action', 'eq', 'retraction_approved'))

    logger.info('Retractions found: {}'.format(len(targets)))
    return targets
Ejemplo n.º 24
0
def main():
    start = datetime.now()
    split = start
    total = MODMNodeLog.find().count()

    count = 0
    page_size = 10000
    blank_users = 0
    blank_nodes = 0

    while count < total:
        garbage = gc.collect()
        print 'Collected {} whole garbages!'.format(garbage)
        print 'Migrating {} through {}'.format(count, count + page_size)

        django_nodelogs = deque()
        nodelog_guids = deque()

        for modm_nodelog in MODMNodeLog.find().sort('-date')[count:count +
                                                             page_size]:
            if modm_nodelog._id in nodelog_guids:
                print 'Nodelog with guid of {} and data of {} exists in batch'.format(
                    modm_nodelog._id, modm_nodelog.to_storage())
                continue
            else:
                nodelog_guids.append(modm_nodelog._id)

            try:
                user_pk = modm_to_django[modm_nodelog.user._id]
            except (KeyError, AttributeError) as ex:
                blank_users += 1
                user_pk = None

            try:
                node_pk = modm_to_django[getattr(modm_nodelog, 'node',
                                                 None)._id]
            except (KeyError, AttributeError) as ex:
                blank_nodes += 1
                print 'Found blank node on {}'.format(modm_nodelog._id)
                node_pk = None

            if modm_nodelog.date is None:
                nodelog_date = None
            else:
                nodelog_date = pytz.utc.localize(modm_nodelog.date)
            django_nodelogs.append(
                NodeLog(guid=modm_nodelog._id,
                        date=nodelog_date,
                        action=modm_nodelog.action,
                        params=modm_nodelog.params,
                        should_hide=modm_nodelog.should_hide,
                        user_id=user_pk,
                        foreign_user=modm_nodelog.foreign_user or '',
                        node_id=node_pk))

            count += 1
            if count % 1000 == 0:
                print 'Through {} in {}'.format(count, (datetime.now() -
                                                        split).total_seconds())
                split = datetime.now()
            if count % page_size == 0:
                print '{} blank users; {} blank nodes'.format(
                    blank_users, blank_nodes)
                print 'Starting to migrate {} through {} which is {}'.format(
                    count - page_size, count, len(django_nodelogs))
                splat = datetime.now()

                if len(django_nodelogs) > 0:
                    with transaction.atomic():
                        NodeLog.objects.bulk_create(django_nodelogs)

                print 'Finished migrating {} through {} in {} which is {}'.format(
                    count - page_size, count,
                    (datetime.now() - splat).total_seconds(),
                    len(django_nodelogs))

                django_nodelogs = deque()
                nodelog_guids = deque()

                garbage = gc.collect()
                print 'Collected {} whole garbages!'.format(garbage)

    print '\a\a\a\a\a'
    print 'Finished migration in {}. MODM: {}, DJANGO: {}'.format(
        (datetime.now() - start).total_seconds(), total,
        NodeLog.objects.count())
    print 'There were {} blank users and {} blank nodes'.format(
        blank_users, blank_nodes)
Ejemplo n.º 25
0
def get_targets():
    return NodeLog.find(Q('should_hide', 'eq', True))
Ejemplo n.º 26
0
def get_targets():
    # ... return the list of logs whose registrations we want to migrate ...
    targets = NodeLog.find(Q('action', 'eq', 'retraction_approved'))

    logger.info('Retractions found: {}'.format(len(targets)))
    return targets
Ejemplo n.º 27
0
def logs_since(user, date):
    return NodeLog.find(
        Q('user', 'eq', user._id) &
        Q('date', 'gt', date)
    )
Ejemplo n.º 28
0
def main():
    total = MODMNodeLog.find().count()
    # total = len(modm_nodelogs)
    count = 0
    page_size = 100000
    django_nodelogs = []
    django_nodelogs_ids = []
    django_nodelogs_was_connected_to = {}

    print 'Migrating {} logs...'.format(total)
    while count < total:
        modm_nodelogs = None
        modm_nodelogs = MODMNodeLog.find().sort('-date')[count:count + page_size]
        with transaction.atomic():
            print 'Migrating {} through {} which is {}'.format(
                count, count + page_size, len(modm_nodelogs))
            for modm_nodelog in modm_nodelogs:

                # don't recreate the log if it exists
                if NodeLog.objects.filter(guid=modm_nodelog._id).exists():
                    pass
                else:
                    if modm_nodelog.user is not None:
                        # try to get the pk out of the lookup table
                        user_pk = modm_to_django.get(modm_nodelog.user._id,
                                                         None)

                        # it wasn't there
                        if user_pk is None:
                            # create a new user
                            print 'Creating User {}'.format(modm_nodelog.user._id)
                            user = get_or_create_user(modm_nodelog.user)
                            user_pk = user.pk
                            # put the user in the lookup table for next time
                            modm_to_django[modm_nodelog.user._id] = user_pk
                    else:
                        # log doesn't have user
                        user_pk = None

                    # get the node (either a MODMNode instance or a node guid)
                    node_id = modm_nodelog.params.get(
                        'node', modm_nodelog.params.get('project'))
                    node_pk = None
                    if node_id is not None:
                        if isinstance(node_id, basestring):
                            # it's a guid, look it up in the table
                            node_pk = modm_to_django.get(node_id, None)
                        elif isinstance(node_id, MODMNode):
                            # it's an instance, look it up in the table
                            node_pk = modm_to_django.get(node_id._id, None)

                        if node_pk is None:
                            print 'Creating Node {}'.format(node_id)
                            # it wasn't in the table
                            if isinstance(node_id, basestring):
                                # it's a guid, get an instance and create a PG version
                                modm_node = MODMNode.load(node_id)
                                django_node = get_or_create_node(modm_node)
                                if django_node is None:
                                    print 'Node {} does not exist.'.format(
                                        node_id)
                                    continue
                                node_pk = get_or_create_node(modm_node).pk
                                # put it in the table for later
                                modm_to_django[modm_node._id] = node_pk
                            elif isinstance(node_id, MODMNode):
                                # it's an instance, create a PG version
                                node_pk = get_or_create_node(node_id).pk
                                # put it in the table for later
                                modm_to_django[node_id._id] = node_pk
                    if node_pk is not None:
                        was_connected_to = []
                        for wct in modm_nodelog.was_connected_to:
                            wct_pk = modm_to_django.get(wct._id, None)
                            if wct_pk is None:
                                wct_pk = get_or_create_node(wct).pk
                                modm_to_django[wct._id] = wct_pk
                            was_connected_to.append(wct_pk)
                        if modm_nodelog.date is None:
                            nodelog_date = None
                        else:
                            nodelog_date = pytz.utc.localize(modm_nodelog.date)
                        if modm_nodelog._id not in django_nodelogs_ids:
                            django_nodelogs.append(NodeLog(
                                guid=modm_nodelog._id,
                                date=nodelog_date,
                                action=modm_nodelog.action,
                                params=modm_nodelog.params,
                                should_hide=modm_nodelog.should_hide,
                                user_id=user_pk,
                                foreign_user=modm_nodelog.foreign_user or '',
                                node_id=node_pk))
                            django_nodelogs_was_connected_to[
                                modm_nodelog._id] = was_connected_to
                            django_nodelogs_ids.append(modm_nodelog._id)
                        else:
                            print 'NodeLog with id {} and data {} was already in the bulk_create'.format(
                                modm_nodelog._id, modm_nodelog.to_storage())

                    else:
                        print 'Node {} is None on NodeLog {}...'.format(
                            node_id, modm_nodelog._id)
                count += 1
                if count % (page_size / 50) == 0:
                    print 'Through {}'.format(count)
                if count % page_size == 0:
                    print 'Starting to migrate {} through {} which should be {}'.format(
                        count - page_size, count, len(django_nodelogs))
                    if len(django_nodelogs) > 0:
                        NodeLog.objects.bulk_create(django_nodelogs)

                        print 'Finished migrating {} through {} which should be {}'.format(
                            count - page_size, count, len(django_nodelogs))
                        print 'Adding m2m values'
                        for django_nodelog in django_nodelogs:
                            nl = NodeLog.objects.get(guid=django_nodelog.guid)
                            nl.was_connected_to.add(
                                *django_nodelogs_was_connected_to[
                                    django_nodelog.guid])
                        print 'Finished adding m2m values'

                    django_nodelogs = []
                    django_nodelogs_was_connected_to = {}
                    garbage = gc.collect()
                    print 'Collected {} garbages!'.format(garbage)

    print '\a'
    print '\a'
    print '\a'
    print '\a'
    print '\a'
    print 'Finished migration. MODM: {}, DJANGO: {}'.format(
        total, NodeLog.objects.all().count())
Ejemplo n.º 29
0
def count_user_logs(user, query=None):
    if query:
        query &= Q('user', 'eq', user._id)
    else:
        query = Q('user', 'eq', user._id)
    return NodeLog.find(query).count()
Ejemplo n.º 30
0
def main():
    total = MODMNodeLog.find().count()
    # total = len(modm_nodelogs)
    count = 0
    page_size = 100000
    django_nodelogs = []
    django_nodelogs_ids = []
    django_nodelogs_was_connected_to = {}

    print 'Migrating {} logs...'.format(total)
    while count < total:
        modm_nodelogs = None
        modm_nodelogs = MODMNodeLog.find().sort('-date')[count:count +
                                                         page_size]
        with transaction.atomic():
            print 'Migrating {} through {} which is {}'.format(
                count, count + page_size, len(modm_nodelogs))
            for modm_nodelog in modm_nodelogs:

                # don't recreate the log if it exists
                if NodeLog.objects.filter(guid=modm_nodelog._id).exists():
                    pass
                else:
                    if modm_nodelog.user is not None:
                        # try to get the pk out of the lookup table
                        user_pk = modm_to_django.get(modm_nodelog.user._id,
                                                     None)

                        # it wasn't there
                        if user_pk is None:
                            # create a new user
                            print 'Creating User {}'.format(
                                modm_nodelog.user._id)
                            user = get_or_create_user(modm_nodelog.user)
                            user_pk = user.pk
                            # put the user in the lookup table for next time
                            modm_to_django[modm_nodelog.user._id] = user_pk
                    else:
                        # log doesn't have user
                        user_pk = None

                    # get the node (either a MODMNode instance or a node guid)
                    node_id = modm_nodelog.params.get(
                        'node', modm_nodelog.params.get('project'))
                    node_pk = None
                    if node_id is not None:
                        if isinstance(node_id, basestring):
                            # it's a guid, look it up in the table
                            node_pk = modm_to_django.get(node_id, None)
                        elif isinstance(node_id, MODMNode):
                            # it's an instance, look it up in the table
                            node_pk = modm_to_django.get(node_id._id, None)

                        if node_pk is None:
                            print 'Creating Node {}'.format(node_id)
                            # it wasn't in the table
                            if isinstance(node_id, basestring):
                                # it's a guid, get an instance and create a PG version
                                modm_node = MODMNode.load(node_id)
                                django_node = get_or_create_node(modm_node)
                                if django_node is None:
                                    print 'Node {} does not exist.'.format(
                                        node_id)
                                    continue
                                node_pk = get_or_create_node(modm_node).pk
                                # put it in the table for later
                                modm_to_django[modm_node._id] = node_pk
                            elif isinstance(node_id, MODMNode):
                                # it's an instance, create a PG version
                                node_pk = get_or_create_node(node_id).pk
                                # put it in the table for later
                                modm_to_django[node_id._id] = node_pk
                    if node_pk is not None:
                        was_connected_to = []
                        for wct in modm_nodelog.was_connected_to:
                            wct_pk = modm_to_django.get(wct._id, None)
                            if wct_pk is None:
                                wct_pk = get_or_create_node(wct).pk
                                modm_to_django[wct._id] = wct_pk
                            was_connected_to.append(wct_pk)
                        if modm_nodelog.date is None:
                            nodelog_date = None
                        else:
                            nodelog_date = pytz.utc.localize(modm_nodelog.date)
                        if modm_nodelog._id not in django_nodelogs_ids:
                            django_nodelogs.append(
                                NodeLog(guid=modm_nodelog._id,
                                        date=nodelog_date,
                                        action=modm_nodelog.action,
                                        params=modm_nodelog.params,
                                        should_hide=modm_nodelog.should_hide,
                                        user_id=user_pk,
                                        foreign_user=modm_nodelog.foreign_user
                                        or '',
                                        node_id=node_pk))
                            django_nodelogs_was_connected_to[
                                modm_nodelog._id] = was_connected_to
                            django_nodelogs_ids.append(modm_nodelog._id)
                        else:
                            print 'NodeLog with id {} and data {} was already in the bulk_create'.format(
                                modm_nodelog._id, modm_nodelog.to_storage())

                    else:
                        print 'Node {} is None on NodeLog {}...'.format(
                            node_id, modm_nodelog._id)
                count += 1
                if count % (page_size / 50) == 0:
                    print 'Through {}'.format(count)
                if count % page_size == 0:
                    print 'Starting to migrate {} through {} which should be {}'.format(
                        count - page_size, count, len(django_nodelogs))
                    if len(django_nodelogs) > 0:
                        NodeLog.objects.bulk_create(django_nodelogs)

                        print 'Finished migrating {} through {} which should be {}'.format(
                            count - page_size, count, len(django_nodelogs))
                        print 'Adding m2m values'
                        for django_nodelog in django_nodelogs:
                            nl = NodeLog.objects.get(guid=django_nodelog.guid)
                            nl.was_connected_to.add(
                                *django_nodelogs_was_connected_to[
                                    django_nodelog.guid])
                        print 'Finished adding m2m values'

                    django_nodelogs = []
                    django_nodelogs_was_connected_to = {}
                    garbage = gc.collect()
                    print 'Collected {} garbages!'.format(garbage)

    print '\a'
    print '\a'
    print '\a'
    print '\a'
    print '\a'
    print 'Finished migration. MODM: {}, DJANGO: {}'.format(
        total,
        NodeLog.objects.all().count())
Ejemplo n.º 31
0
def get_targets():
    return NodeLog.find(Q('action', 'eq', NodeLog.WIKI_DELETED))
Ejemplo n.º 32
0
def get_aggregate_logs(ids, user, count=100):
    query = Q('params.node', 'in', ids)
    return list(NodeLog.find(query).sort('date').limit(int(count)))
Ejemplo n.º 33
0
def count_user_logs(user, query=None):
    if query:
        query &= Q('user', 'eq', user._id)
    else:
        query = Q('user', 'eq', user._id)
    return NodeLog.find(query).count()
Ejemplo n.º 34
0
def get_registration_approved_logs():
    # These logs do not have params['registration'] field
    logs = NodeLog.find(Q('action', 'eq', 'registration_approved') & Q('params.registration', 'eq', None))
    return logs
Ejemplo n.º 35
0
def main():
    start = datetime.now()
    split = start
    total = MODMNodeLog.find().count()

    count = 0
    page_size = 10000
    blank_users = 0
    blank_nodes = 0

    while count < total:
        garbage = gc.collect()
        print 'Collected {} whole garbages!'.format(garbage)
        print 'Migrating {} through {}'.format(count, count + page_size)

        django_nodelogs = deque()
        nodelog_guids = deque()

        for modm_nodelog in MODMNodeLog.find().sort('-date')[count:count +
                                                             page_size]:
            if modm_nodelog._id in nodelog_guids:
                print 'Nodelog with guid of {} and data of {} exists in batch'.format(
                    modm_nodelog._id, modm_nodelog.to_storage())
                continue
            else:
                nodelog_guids.append(modm_nodelog._id)

            try:
                user_pk = modm_to_django[modm_nodelog.user._id]
            except (KeyError, AttributeError) as ex:
                blank_users += 1
                user_pk = None

            try:
                node_pk = modm_to_django[getattr(modm_nodelog, 'node',
                                                 None)._id]
            except (KeyError, AttributeError) as ex:
                blank_nodes += 1
                print 'Found blank node on {}'.format(modm_nodelog._id)
                node_pk = None

            if modm_nodelog.date is None:
                nodelog_date = None
            else:
                nodelog_date = pytz.utc.localize(modm_nodelog.date)
            django_nodelogs.append(
                NodeLog(guid=modm_nodelog._id,
                        date=nodelog_date,
                        action=modm_nodelog.action,
                        params=modm_nodelog.params,
                        should_hide=modm_nodelog.should_hide,
                        user_id=user_pk,
                        foreign_user=modm_nodelog.foreign_user or '',
                        node_id=node_pk))

            count += 1
            if count % 1000 == 0:
                print 'Through {} in {}'.format(count, (
                    datetime.now() - split).total_seconds())
                split = datetime.now()
            if count % page_size == 0:
                print '{} blank users; {} blank nodes'.format(blank_users,
                                                              blank_nodes)
                print 'Starting to migrate {} through {} which is {}'.format(
                    count - page_size, count, len(django_nodelogs))
                splat = datetime.now()

                if len(django_nodelogs) > 0:
                    with transaction.atomic():
                        NodeLog.objects.bulk_create(django_nodelogs)

                print 'Finished migrating {} through {} in {} which is {}'.format(
                    count - page_size, count,
                    (datetime.now() - splat).total_seconds(),
                    len(django_nodelogs))

                django_nodelogs = deque()
                nodelog_guids = deque()

                garbage = gc.collect()
                print 'Collected {} whole garbages!'.format(garbage)

    print '\a\a\a\a\a'
    print 'Finished migration in {}. MODM: {}, DJANGO: {}'.format(
        (datetime.now() - start).total_seconds(), total,
        NodeLog.objects.count())
    print 'There were {} blank users and {} blank nodes'.format(blank_users,
                                                                blank_nodes)
Ejemplo n.º 36
0
def get_targets():
    return NodeLog.find(Q('action', 'eq', NodeLog.WIKI_DELETED))
def get_targets():
    return NodeLog.find(Q('action', 'eq', NodeLog.EMBARGO_APPROVED) & Q('params.user', 'eq', None))