Beispiel #1
0
def remove_duplicates(by='pmid'):
    """Remove duplicate articles by field.

    :param str by: Article field to identify duplicates

    """
    counts = collections.defaultdict(int)
    values = [
        value[by]
        for value in Article._storage[0].store.find({}, {by: 1})
    ]
    for value in values:
        counts[value] += 1

    for value, count in counts.items():
        if count == 1:
            continue
        articles = list(Article.find(Q(by, 'eq', value)))
        for duplicate in articles[1:]:
            logger.debug(
                'Deleting duplicate record: {}'.format(
                    value
                )
            )
            Article.remove_one(duplicate)
Beispiel #2
0
def remove_duplicates(by='pmid'):
    """Remove duplicate articles by field.

    :param str by: Article field to identify duplicates
    """
    counts = collections.defaultdict(int)
    values = [
        value[by] for value in Article._storage[0].store.find({}, {by: 1})
    ]
    for value in values:
        counts[value] += 1

    for value, count in counts.items():
        if count == 1:
            continue
        articles = list(Article.find(Q(by, 'eq', value)))
        for duplicate in articles[1:]:
            logger.debug('Deleting duplicate record: {}'.format(value))
            Article.remove_one(duplicate)