def test_migrate_v2():
    DocumentModelOld.create(**RAW.attributes).save()
    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert(len(queryset) == 0)
    tasks.migrate_to_source_partition(dry=False)
    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert(len(queryset) == 1)
Exemple #2
0
def test_migrate_v2():
    DocumentModelOld.create(**RAW.attributes).save()
    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert len(queryset) == 0
    tasks.migrate_to_source_partition(dry=False)
    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert len(queryset) == 1
Exemple #3
0
def test_migrate_v2():
    try:
        RAW['doc'] = RAW['doc'].encode('utf-8')
    except AttributeError:
        RAW['doc'] = str(RAW['doc'])
    DocumentModelOld.create(**RAW.attributes).save()
    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert len(queryset) == 0
    tasks.migrate_to_source_partition(dry=False)
    queryset = DocumentModel.objects(docID=RAW['docID'], source=RAW['source'])
    assert len(queryset) == 1
Exemple #4
0
    kwargs['async'] = async
    kwargs['group_size'] = group_size
    kwargs['sources'] = map(lambda x: x.strip(), sources.split(','))

    if kwargs['sources'] == ['']:
        kwargs.pop('sources')

    migrate_func = migrations.__dict__[migration]

    migrate(migrate_func, **kwargs)


@task
def migrate_to_source_partition(dry=True, async=False):
    from scrapi.tasks import migrate_to_source_partition
    migrate_to_source_partition(dry=dry, async=async)


@task
def reset_search():
    run("curl -XPOST 'http://localhost:9200/_shutdown'")
    if platform.linux_distribution()[0] == 'Ubuntu':
        run("sudo service elasticsearch restart")
    elif platform.system() == 'Darwin':  # Mac OSX
        run('elasticsearch')


@task
def elasticsearch():
    '''Start a local elasticsearch server
Exemple #5
0
    kwargs['dry'] = dry
    kwargs['async'] = async
    kwargs['sources'] = map(lambda x: x.strip(), sources.split(','))

    if kwargs['sources'] == ['']:
        kwargs.pop('sources')

    migrate_func = migrations.__dict__[migration]

    migrate(migrate_func, **kwargs)


@task
def migrate_to_source_partition(dry=True, async=False):
    from scrapi.tasks import migrate_to_source_partition
    migrate_to_source_partition(dry=dry, async=async)


@task
def reset_search():
    run("curl -XPOST 'http://localhost:9200/_shutdown'")
    if platform.linux_distribution()[0] == 'Ubuntu':
        run("sudo service elasticsearch restart")
    elif platform.system() == 'Darwin':  # Mac OSX
        run('elasticsearch')


@task
def elasticsearch():
    '''Start a local elasticsearch server