def main():
    google_service_account_path = Path(__file__).parent / 'google_service_account.json'
    google_service_account_json = os.getenv('GOOGLE_SERVICE_ACCOUNT') or google_service_account_path.read_text()
    google_service_account = json.loads(google_service_account_json)
    google_scope = ['https://spreadsheets.google.com/feeds',
                    'https://www.googleapis.com/auth/drive']
    credentials = ServiceAccountCredentials.from_json_keyfile_dict(google_service_account, google_scope)

    doc_key = '1TO5Yzk0-4V_RzRK5Jr9I_pF5knZsEZrNn2HKTXrHgls'
    doc = gspread.authorize(credentials).open_by_key(doc_key)
    records = doc.worksheet('jobs').get_all_records(default_blank=None)

    with db:
        for model in [Job, JobError, JobDropped]:
            model.drop_table()
            model.create_table()

        for record in records:
            Job.create(**coerce_record(record))

    Pool().map(run_spider, [
        'linkedin',
        'stackoverflow',
        'startupjobs',
    ])
def main():
    google_analytics_metrics = fetch_from_google_analytics()
    mailchimp_metrics = fetch_from_mailchimp()

    with db:
        GlobalMetric.drop_table()
        GlobalMetric.create_table()

        GlobalMetric.create(name='avg_monthly_users',
                            value=google_analytics_metrics['avg_monthly_users'])
        GlobalMetric.create(name='avg_monthly_pageviews',
                            value=google_analytics_metrics['avg_monthly_pageviews'])
        GlobalMetric.create(name='subscribers',
                            value=mailchimp_metrics['subscribers'])

        JobMetric.drop_table()
        JobMetric.create_table()

        for url, value in google_analytics_metrics['users_per_job'].items():
            try:
                job = Job.get_by_url(url)
                JobMetric.create(job=job, name='users', value=value)
            except Job.DoesNotExist:
                pass

        for url, value in google_analytics_metrics['pageviews_per_job'].items():
            try:
                job = Job.get_by_url(url)
                JobMetric.create(job=job, name='pageviews', value=value)
            except Job.DoesNotExist:
                pass

        for url, value in google_analytics_metrics['applications_per_job'].items():
            try:
                job = Job.get_by_url(url)
                JobMetric.create(job=job, name='applications', value=value)
            except Job.DoesNotExist:
                pass

        users_per_external_url = merge_metric_dicts(
            google_analytics_metrics['users_per_external_job'],
            mailchimp_metrics['users_per_external_url']
        )
        for url, value in users_per_external_url.items():
            try:
                job = Job.get_by_link(url)
                JobMetric.create(job=job, name='users', value=value)
            except Job.DoesNotExist:
                pass

        pageviews_per_external_url = merge_metric_dicts(
            google_analytics_metrics['pageviews_per_external_job'],
            mailchimp_metrics['pageviews_per_external_url']
        )
        for url, value in pageviews_per_external_url.items():
            try:
                job = Job.get_by_link(url)
                JobMetric.create(job=job, name='pageviews', value=value)
            except Job.DoesNotExist:
                pass
Beispiel #3
0
def index():
    with db:
        jobs_count = Job.count()
        companies_count = Job.companies_count()
    return render_template('index.html',
                           jobs_count=jobs_count,
                           companies_count=companies_count,
                           stories=Story.listing())
Beispiel #4
0
def candidate():
    with db:
        jobs_count = Job.count()
        companies_count = Job.companies_count()
    return render_template(
        'candidate.html',
        jobs_count=jobs_count,
        companies_count=companies_count,
        thumbnail=thumbnail(title='Příručka hledání první práce v\u00a0IT'))
Beispiel #5
0
def jobs():
    with db:
        metrics = dict(**Metric.as_dict(), **Job.aggregate_metrics())
        jobs = Job.listing()
    return render_template('jobs.html',
                           nav_active='jobs',
                           subnav_tabs=JOBS_SUBNAV_TABS,
                           subnav_active='jobs',
                           jobs=jobs,
                           regions=REGIONS,
                           metrics=metrics,
                           thumbnail=thumbnail(title='Práce v\u00a0IT pro začátečníky'))
Beispiel #6
0
def job(job_id):
    with db:
        job = Job.get_by_id(job_id) or abort(404)
        jobs_count = Job.count()
        companies_count = Job.companies_count()
    return render_template('job.html',
                           job=job,
                           jobs_count=jobs_count,
                           companies_count=companies_count,
                           thumbnail=thumbnail(job_title=job.title,
                                               job_company=job.company_name,
                                               job_location=job.location))
def db():
    # Using tmp file because we need to test opening and closing a db conn
    # here and the :memory: sqlite db ceases to exist with the conn closed
    tmp_file = NamedTemporaryFile(delete=False)
    db_path = Path(tmp_file.name)
    tmp_file.close()
    db = SqliteDatabase(tmp_file.name)
    with db:
        Job.bind(db)
        Job.create_table()
    yield db
    if db_path.exists():
        db_path.unlink()
Beispiel #8
0
def job(job_id):
    with db:
        metrics = dict(**Metric.as_dict(), **Job.aggregate_metrics())
        job = Job.juniorguru_get_by_id(job_id)
    return render_template('job.html',
                           nav_active='jobs',
                           subnav_tabs=JOBS_SUBNAV_TABS,
                           subnav_active='jobs',
                           job=job,
                           metrics=metrics,
                           thumbnail=thumbnail(job_title=job.title,
                                               job_company=job.company_name,
                                               job_location=job.location))
Beispiel #9
0
def jobs():
    with db:
        jobs = Job.listing()
        jobs_count = Job.count()
        companies_count = Job.companies_count()
        jobs_bot = Job.bot_listing()
    return render_template(
        'jobs.html',
        jobs=jobs,
        jobs_count=jobs_count,
        companies_count=companies_count,
        jobs_bot=jobs_bot,
        thumbnail=thumbnail(title='Práce pro začínající programátory'))
Beispiel #10
0
def jobs():
    with db:
        jobs = Job.listing()
        jobs_count = Job.count()
        companies_count = Job.companies_count()
        jobs_bot = Job.bot_listing()
    return render_template(
        'jobs.html',
        jobs=jobs,
        jobs_count=jobs_count,
        companies_count=companies_count,
        jobs_bot=jobs_bot,
        thumbnail=thumbnail(title='Práce v\u00a0IT pro začátečníky'))
Beispiel #11
0
def jobs_region(region_id):
    region = [reg for reg in REGIONS if reg['id'] == region_id][0]
    with db:
        metrics = dict(**Metric.as_dict(), **Job.aggregate_metrics())
        jobs = Job.region_listing(region['name'])
    return render_template('jobs_region.html',
                           nav_active='jobs',
                           subnav_tabs=JOBS_SUBNAV_TABS,
                           subnav_active='jobs',
                           jobs=jobs,
                           region=region,
                           regions=REGIONS,
                           metrics=metrics,
                           thumbnail=thumbnail(title=f"Práce v\u00a0IT pro začátečníky —\u00a0{region['name']}"))
Beispiel #12
0
def index():
    with db:
        metrics = Job.aggregate_metrics()
    return render_template('index.html',
                           nav_tabs=None,
                           metrics=metrics,
                           stories=Story.listing())
def test_listing_returns_only_not_expired_jobs(db_connection):
    job1 = create_job('1', expires_at=None)
    job2 = create_job('2', expires_at=date(1987, 8, 30))
    job3 = create_job('3', expires_at=date.today())
    job4 = create_job('4', expires_at=date.today() + timedelta(days=2))

    assert set(Job.listing()) == {job1, job4}
Beispiel #14
0
async def manage_jobs_channel(client):
    channel = await client.fetch_channel(JOBS_CHANNEL)

    jobs = list(Job.listing())
    seen_links = set()

    async for message in channel.history(limit=None, after=None):
        for job in jobs:
            if job.link.rstrip('/') in message.content:
                log.info(f'Job {job.link} exists')
                seen_links.add(job.link)
                if message.reactions:
                    job.upvotes_count = count_upvotes(message.reactions)
                    job.downvotes_count = count_downvotes(message.reactions)
                    with db:
                        job.save()
                    log.info(f'Saved {job.link} reactions')

    if DISCORD_MUTATIONS_ENABLED:
        new_jobs = [job for job in jobs if job.link not in seen_links]
        log.info(f'Posting {len(new_jobs)} new jobs')
        for job in new_jobs:
            await channel.send(
                f'**{job.title}**\n{job.company_name} – {job.location}\n{job.link}'
            )
    else:
        log.warning(
            "Skipping Discord mutations, DISCORD_MUTATIONS_ENABLED not set")
Beispiel #15
0
def test_newsletter_listing_backfills_up_to_min_count(db_connection):
    job1 = create_job('1', source='moo', sort_rank=5)
    job2 = create_job('2', source='foo', sort_rank=1)  # noqa
    job3 = create_job('3', source='bar', sort_rank=10)
    job4 = create_job('4', source='juniorguru')

    assert list(Job.newsletter_listing(3)) == [job4, job3, job1]
Beispiel #16
0
def test_database_id_prefilled(db, pipeline, item, spider):
    item['id'] = 'honza42'
    pipeline.process_item(item, spider)
    with db:
        job = Job.select()[0]

    assert job.id == 'honza42'
Beispiel #17
0
def test_database_company_logo_path(db, pipeline, item, spider):
    item['company_logos'] = [
        {
            'checksum':
            '6b874bd7b996e9323fd2e094be83ca4c',
            'path':
            'company-logos/d40730d4068db31a09687ebb42f7637e26864a30.png',
            'status':
            'uptodate',
            'url':
            'https://www.startupjobs.cz/uploads/d6e95f8c946b72f36783aa0a0238341b.png'
        },
        {
            'checksum':
            'f3e2f82d7d8b24367f0a2c24b3d1aea3',
            'path':
            'company-logos/d1eed8447fb59dc9587dd97148a109a3cca77ed8.png',
            'status':
            'uptodate',
            'url':
            'https://www.startupjobs.cz/uploads/GQ1A8RDZWYUJfavicon155377551420.png'
        },
    ]
    pipeline.process_item(item, spider)
    with db:
        job = Job.select()[0]

    assert job.company_logo_path == 'images/company-logos/d40730d4068db31a09687ebb42f7637e26864a30.png'
def test_count(db_connection):
    create_job('1', approved_at=date(1987, 8, 30))
    create_job('2', approved_at=None)
    create_job('3', approved_at=date(1987, 8, 30))
    create_job('4', approved_at=date(1987, 8, 30), expires_at=date(1987, 9, 1))

    assert Job.count() == 2
Beispiel #19
0
def generate_messages(today):
    jobs = Job.juniorguru_listing()

    template_path = Path(__file__).parent / 'templates' / 'job_metrics.html'
    template = Template(template_path.read_text())

    return (create_message(job, template, today) for job in jobs)
Beispiel #20
0
def test_juniorguru_listing(db_connection):
    job1 = create_job('1', source='juniorguru', sort_rank=30)
    job2 = create_job('2', source='moo')  # noqa
    job3 = create_job('3', source='juniorguru', sort_rank=20)
    job4 = create_job('4', source='juniorguru', sort_rank=10)

    assert list(Job.juniorguru_listing()) == [job1, job3, job4]
Beispiel #21
0
def test_aggregate_metrics_companies_count(db_connection):
    create_job('1', company_link='https://example.com/1', source='juniorguru')
    create_job('2', company_link='https://example.com/2', source='juniorguru')
    create_job('3', company_link='https://example.com/2', source='juniorguru')
    create_job('4', company_link='https://example.com/3', source='xyz')

    assert Job.aggregate_metrics()['companies_count'] == 2
Beispiel #22
0
def test_remote_listing(db_connection):
    job1 = create_job('1', remote=True, sort_rank=30)
    job2 = create_job('2', remote=False)  # noqa
    job3 = create_job('3', remote=True, sort_rank=20)
    job4 = create_job('4', remote=True, sort_rank=10)

    assert list(Job.remote_listing()) == [job1, job3, job4]
Beispiel #23
0
def test_database(db, pipeline, item, spider):
    pipeline.process_item(item, spider)
    with db:
        job = Job.select()[0]

    assert len(job.id) == 56  # sha224 hex digest length
    assert job.source == 'dummy'  # spider name
def test_newsletter_listing_returns_only_jg_if_enough(db_connection):
    job1 = create_job('1', source='juniorguru')
    job2 = create_job('2', source='moo')
    job3 = create_job('3', source='juniorguru')
    job4 = create_job('4', source='juniorguru')

    assert list(Job.newsletter_listing(3)) == [job1, job3, job4]
Beispiel #25
0
async def manage_jobs_voting_channel(client):  # experimenting with Mila and ML
    channel = await client.fetch_channel(JOBS_VOTING_CHANNEL)
    seen_links = set()

    log.info('Processing voting for jobs')
    jobs = list(Job.select().where(Job.magic_is_junior == False)
                )  # TODO PoC, move this to models or revamp models altogether?
    async for message in channel.history(limit=None, after=None):
        for job in jobs:
            link = job.link
            if link.rstrip('/') in message.content:
                log.info(f'Job {link} exists')
                seen_links.add(link)
                if message.reactions:
                    job.upvotes_count += count_upvotes(message.reactions)
                    job.downvotes_count += count_downvotes(message.reactions)
                    with db:
                        job.save()
                    log.info(f'Saved {link} reactions')

    log.info('Processing voting for dropped jobs')
    jobs_dropped = list(JobDropped.select().where(
        JobDropped.magic_is_junior ==
        True))  # TODO PoC, move this to models or revamp models altogether?
    async for message in channel.history(limit=None, after=None):
        for job_dropped in jobs_dropped:
            link = job_dropped.item['link']
            if link.rstrip('/') in message.content:
                log.info(f'Job {link} exists')
                seen_links.add(link)
                if message.reactions:
                    job_dropped.upvotes_count += count_upvotes(
                        message.reactions)
                    job_dropped.downvotes_count += count_downvotes(
                        message.reactions)
                    with db:
                        job_dropped.save()
                    log.info(f'Saved {link} reactions')

    if DISCORD_MUTATIONS_ENABLED:
        new_jobs = [job for job in jobs if job.link not in seen_links]
        log.info(f'Posting {len(new_jobs)} new jobs')
        for job in new_jobs:
            await channel.send(
                f'**{job.title}**\n{job.company_name} – {job.location}\n{job.link}'
            )

        new_jobs_dropped = [
            job_dropped for job_dropped in jobs_dropped
            if job_dropped.item['link'] not in seen_links
        ]
        log.info(f'Posting {len(new_jobs_dropped)} new dropped jobs')
        for job_dropped in new_jobs_dropped:
            await channel.send(
                f"**{job_dropped.item['title']}**\n{job_dropped.item['company_name']} – {', '.join(job_dropped.item['locations_raw'])}\n{job_dropped.item['link']}"
            )
    else:
        log.warning(
            "Skipping Discord mutations, DISCORD_MUTATIONS_ENABLED not set")
Beispiel #26
0
def test_newsletter_listing_returns_only_juniorguru_if_enough(db_connection):
    job1 = create_job('1', source='juniorguru', sort_rank=30)
    job2 = create_job('2', source='moo')  # noqa
    job3 = create_job('3', source='juniorguru', sort_rank=20)
    job4 = create_job('4', source='juniorguru', sort_rank=10)
    job5 = create_job('5', source='juniorguru', sort_rank=5)

    assert list(Job.newsletter_listing(3)) == [job1, job3, job4, job5]
def test_companies_count_takes_only_approved_jobs(db):
    create_job('1', company_link='https://abc.example.com', is_approved=True)
    create_job('2', company_link='https://abc.example.com', is_approved=False)
    create_job('3', company_link='https://xyz.example.com', is_approved=True)
    create_job('4', company_link='https://xyz.example.com', is_approved=False)
    create_job('5', company_link='https://def.example.com', is_approved=False)

    assert Job.companies_count() == 2
Beispiel #28
0
def main():
    doc_key = '1TO5Yzk0-4V_RzRK5Jr9I_pF5knZsEZrNn2HKTXrHgls'
    records = download_sheet(doc_key, 'jobs')

    with db:
        for model in [Job, JobError, JobDropped]:
            model.drop_table()
            model.create_table()

        for record in records:
            Job.create(**coerce_record(record))

    Pool().map(run_spider, [
        'linkedin',
        'stackoverflow',
        'startupjobs',
    ])
def test_database(item, spider, db):
    Pipeline(db=db, model=Job).process_item(item, spider)
    with db:
        job = Job.select()[0]

    assert len(job.id) == 56  # sha224 hex digest length
    assert job.source == 'dummy'  # spider name
    assert job.is_approved is False
Beispiel #30
0
 def operation():
     job = Job.get_by_id(item.get('id') or create_id(item))
     job.item = item
     for attr, value in response_data.items():
         setattr(job, attr, value)
     job.save()
     log.debug(f"Updated job '{job.id}' with monitoring data")
     self.stats.inc_value('monitoring/job_saved')