Exemple #1
0
def test_database_id_prefilled(db, pipeline, item, spider):
    item['id'] = 'honza42'
    pipeline.process_item(item, spider)
    with db:
        job = Job.select()[0]

    assert job.id == 'honza42'
Exemple #2
0
def test_database_company_logo_path(db, pipeline, item, spider):
    item['company_logos'] = [
        {
            'checksum':
            '6b874bd7b996e9323fd2e094be83ca4c',
            'path':
            'company-logos/d40730d4068db31a09687ebb42f7637e26864a30.png',
            'status':
            'uptodate',
            'url':
            'https://www.startupjobs.cz/uploads/d6e95f8c946b72f36783aa0a0238341b.png'
        },
        {
            'checksum':
            'f3e2f82d7d8b24367f0a2c24b3d1aea3',
            'path':
            'company-logos/d1eed8447fb59dc9587dd97148a109a3cca77ed8.png',
            'status':
            'uptodate',
            'url':
            'https://www.startupjobs.cz/uploads/GQ1A8RDZWYUJfavicon155377551420.png'
        },
    ]
    pipeline.process_item(item, spider)
    with db:
        job = Job.select()[0]

    assert job.company_logo_path == 'images/company-logos/d40730d4068db31a09687ebb42f7637e26864a30.png'
Exemple #3
0
def test_database(db, pipeline, item, spider):
    pipeline.process_item(item, spider)
    with db:
        job = Job.select()[0]

    assert len(job.id) == 56  # sha224 hex digest length
    assert job.source == 'dummy'  # spider name
Exemple #4
0
async def manage_jobs_voting_channel(client):  # experimenting with Mila and ML
    channel = await client.fetch_channel(JOBS_VOTING_CHANNEL)
    seen_links = set()

    log.info('Processing voting for jobs')
    jobs = list(Job.select().where(Job.magic_is_junior == False)
                )  # TODO PoC, move this to models or revamp models altogether?
    async for message in channel.history(limit=None, after=None):
        for job in jobs:
            link = job.link
            if link.rstrip('/') in message.content:
                log.info(f'Job {link} exists')
                seen_links.add(link)
                if message.reactions:
                    job.upvotes_count += count_upvotes(message.reactions)
                    job.downvotes_count += count_downvotes(message.reactions)
                    with db:
                        job.save()
                    log.info(f'Saved {link} reactions')

    log.info('Processing voting for dropped jobs')
    jobs_dropped = list(JobDropped.select().where(
        JobDropped.magic_is_junior ==
        True))  # TODO PoC, move this to models or revamp models altogether?
    async for message in channel.history(limit=None, after=None):
        for job_dropped in jobs_dropped:
            link = job_dropped.item['link']
            if link.rstrip('/') in message.content:
                log.info(f'Job {link} exists')
                seen_links.add(link)
                if message.reactions:
                    job_dropped.upvotes_count += count_upvotes(
                        message.reactions)
                    job_dropped.downvotes_count += count_downvotes(
                        message.reactions)
                    with db:
                        job_dropped.save()
                    log.info(f'Saved {link} reactions')

    if DISCORD_MUTATIONS_ENABLED:
        new_jobs = [job for job in jobs if job.link not in seen_links]
        log.info(f'Posting {len(new_jobs)} new jobs')
        for job in new_jobs:
            await channel.send(
                f'**{job.title}**\n{job.company_name} – {job.location}\n{job.link}'
            )

        new_jobs_dropped = [
            job_dropped for job_dropped in jobs_dropped
            if job_dropped.item['link'] not in seen_links
        ]
        log.info(f'Posting {len(new_jobs_dropped)} new dropped jobs')
        for job_dropped in new_jobs_dropped:
            await channel.send(
                f"**{job_dropped.item['title']}**\n{job_dropped.item['company_name']} – {', '.join(job_dropped.item['locations_raw'])}\n{job_dropped.item['link']}"
            )
    else:
        log.warning(
            "Skipping Discord mutations, DISCORD_MUTATIONS_ENABLED not set")
def test_database(item, spider, db):
    Pipeline(db=db, model=Job).process_item(item, spider)
    with db:
        job = Job.select()[0]

    assert len(job.id) == 56  # sha224 hex digest length
    assert job.source == 'dummy'  # spider name
    assert job.is_approved is False
def test_database_same_link_items(item, spider, db):
    for location in ['Ostrava', 'Brno', 'Pardubice']:
        item['location'] = location
        Pipeline(db=db, model=Job).process_item(item, spider)
    with db:
        jobs = Job.select()

    assert len(jobs) == 3
    assert len({job.id for job in jobs}) == 3
    assert len({job.location for job in jobs}) == 3
Exemple #7
0
def test_database_id(db, pipeline, item, spider):
    pipeline.process_item(item, spider)
    with db:
        job = Job.select()[0]

    assert len(job.id) == 56  # sha224 hex digest length
Exemple #8
0
def read_data_job():
    jobs = Job.select(Job.id, Job.item).where(Job.junior_rank > 0)
    df = map_jobs_to_df(jobs)

    return jobs, df