Beispiel #1
0
class Story(BaseModel):
    url = CharField()
    date = DateField(index=True)
    title = CharField()
    image_path = CharField()
    tags = JSONField(default=lambda: [])

    @property
    def publisher(self):
        return re.sub(r'^www\.', '', urlparse(self.url).netloc).lower()

    @classmethod
    def listing(cls):
        return cls.select().order_by(cls.date.desc())

    @classmethod
    def tag_listing(cls, tag):
        tags = cls.tags.children().alias('tags')
        return cls.listing() \
            .from_(cls, tags) \
            .where(tags.c.value == tag)

    @classmethod
    def tags_mapping(cls):
        mapping = {}
        for story in cls.listing():
            for tag in story.tags:
                mapping.setdefault(tag, [])
                mapping[tag].append(story)
        return mapping
Beispiel #2
0
class JobDropped(BaseModel):
    type = CharField()
    reason = CharField()
    response_url = CharField()
    response_backup_path = CharField(null=True)
    item = JSONField()

    @classmethod
    def admin_listing(cls):
        return cls.select().order_by(cls.type, cls.reason)
Beispiel #3
0
class JobError(BaseModel):
    message = CharField()
    trace = CharField()
    signal = CharField(choices=(('item', None), ('spider', None)))
    source = CharField()
    response_url = CharField()
    response_backup_path = CharField(null=True)
    item = JSONField(null=True)

    @classmethod
    def admin_listing(cls):
        return cls.select().order_by(cls.message)
Beispiel #4
0
class MessageAuthor(BaseModel):
    id = IntegerField(primary_key=True)
    is_bot = BooleanField(default=False)
    is_member = BooleanField(default=True)
    has_avatar = BooleanField(default=False)
    display_name = CharField()
    mention = CharField()
    joined_at = DateTimeField(null=True)
    roles = JSONField(default=lambda: [])

    def messages_count(self):
        return self.list_messages.count()

    def recent_messages_count(self, today=None):
        return self.list_recent_messages(today).count()

    def upvotes_count(self):
        messages = self.list_messages \
            .where(Message.channel_id.not_in(UPVOTES_EXCLUDE_CHANNELS))
        return sum([message.upvotes for message in messages])

    def recent_upvotes_count(self, today=None):
        messages = self.list_recent_messages(today) \
            .where(Message.channel_id.not_in(UPVOTES_EXCLUDE_CHANNELS))
        return sum([message.upvotes for message in messages])

    def has_intro(self):
        intro_message = self.list_messages \
            .where(Message.channel_id == INTRO_CHANNEL, Message.type == 'default') \
            .first()
        return bool(intro_message)

    def first_seen_at(self):
        first_message = self.list_messages \
            .order_by(Message.created_at) \
            .first()
        return first_message.created_at.date() if first_message else self.joined_at

    def list_recent_messages(self, today=None):
        recent_period_start_at = (today or date.today()) - timedelta(days=RECENT_PERIOD_DAYS)
        return self.list_messages.where(Message.created_at >= recent_period_start_at)

    def is_new(self, today=None):
        return (self.first_seen_at() + timedelta(days=IS_NEW_PERIOD_DAYS)) >= (today or date.today())

    @classmethod
    def top_members_limit(cls):
        return math.ceil(cls.members_listing().count() * TOP_MEMBERS_PERCENT)

    @classmethod
    def members_listing(cls):
        return cls.select().where(cls.is_bot == False, cls.is_member == True)
Beispiel #5
0
class Event(BaseModel):
    title = CharField()
    start_at = DateTimeField(index=True)
    description = TextField()
    poster_description = TextField(null=True)
    bio = TextField(null=True)
    bio_links = JSONField(default=lambda: [])
    recording_url = CharField(null=True)
    poster_path = CharField(null=True)
    logo_path = CharField(null=True)

    @property
    def start_at_prg(self):
        return arrow.get(self.start_at).to('Europe/Prague').naive

    @property
    def url(self):
        dt_string = self.start_at_prg.isoformat()
        dt_string = dt_string.replace(':', '-')
        return f"https://junior.guru/events/#{dt_string}"

    @property
    def first_avatar_path(self):
        try:
            return next(filter(None, [speaking.avatar_path for speaking in self.list_speaking]))
        except StopIteration:
            return None

    @classmethod
    def next(cls, today=None):
        today = today or date.today()
        return cls.select() \
            .where(cls.start_at >= today) \
            .order_by(cls.start_at) \
            .first()

    @classmethod
    def list_speaking_members(cls):
        return ClubUser.select() \
            .where(ClubUser.is_member == True) \
            .join(EventSpeaking)

    @classmethod
    def archive_listing(cls, today=None):
        today = today or date.today()
        return cls.select() \
            .where(cls.start_at < today) \
            .order_by(cls.start_at.desc())
Beispiel #6
0
class JobDropped(BaseModel):
    type = CharField()
    reason = CharField()
    source = CharField()
    response_url = CharField()
    response_backup_path = CharField(null=True)
    item = JSONField()
    magic_is_junior = BooleanField(null=True)
    upvotes = IntegerField(default=0)
    downvotes = IntegerField(default=0)

    @classmethod
    def admin_listing(cls, types=None):
        jobs = cls.select()
        if types:
            jobs = jobs.where(cls.type.in_(types))
        return sorted(jobs,
                      key=lambda job: (
                          job.type,
                          'junior' not in job.item.get('title', '').lower(),
                          -1 * job.item.get('junior_rank', -1000),
                          job.reason,
                      ))

    @classmethod
    def rejected_count(cls):
        return cls.select() \
            .where(cls.source != 'juniorguru') \
            .count()

    @classmethod
    def sources(cls):
        return {job_dropped.source for job_dropped in JobDropped.select()}

    @classmethod
    def expired_company_links(cls):
        return {
            job_dropped.item.get('company_link')
            for job_dropped in cls.select().where(JobDropped.type == 'Expired')
        }
Beispiel #7
0
class Job(BaseModel):
    id = CharField(primary_key=True)
    posted_at = DateTimeField(index=True)
    title = CharField()
    location = CharField()
    company_name = CharField()
    company_link = CharField(null=True)  # required for JG
    employment_types = EmploymentTypeField()
    link = CharField(null=True, index=True)  # required for scraped
    source = CharField()

    # only set by JG
    email = CharField(null=True)  # required for JG
    description = TextField(null=True)  # required for JG
    approved_at = DateField(null=True)
    expires_at = DateField(null=True)
    newsletter_at = DateField(null=True)

    # only set by scraped
    lang = CharField(null=True)  # required for scraped
    jg_rank = IntegerField(null=True)  # required for scraped
    response_url = CharField(null=True)  # required for scraped
    response_backup_path = CharField(null=True)
    item = JSONField(null=True)  # required for scraped
Beispiel #8
0
class Job(BaseModel):
    id = CharField(primary_key=True)
    source = CharField(index=True)
    posted_at = DateField(index=True)
    title = CharField()
    remote = BooleanField(default=False)
    locations = JSONField(default=lambda: [])
    company_name = CharField()
    company_link = CharField(null=True)
    company_logo_path = CharField(null=True)
    employment_types = JSONField(default=lambda: [])
    link = CharField(index=True)
    lang = CharField()
    description_html = TextField()
    junior_rank = IntegerField(index=True)
    magic_is_junior = BooleanField(null=True)
    sort_rank = IntegerField(index=True)
    pricing_plan = CharField(default='community', choices=[
        ('community', None),
        ('standard', None),
        ('annual_flat_rate', None),
    ])
    upvotes_count = IntegerField(default=0)
    downvotes_count = IntegerField(default=0)

    # source: juniorguru
    external_link = CharField(null=True)
    email = CharField(null=True)
    expires_at = DateField(null=True)

    # diagnostics
    item = JSONField(null=True)
    response_url = CharField(null=True)
    response_backup_path = CharField(null=True)

    @property
    def is_juniorguru(self):
        return self.source == 'juniorguru'

    @property
    def is_highlighted(self):
        return self.pricing_plan != 'community'

    @property
    def location(self):
        # TODO refactor, this is terrible
        if len(self.locations) == 1:
            location = self.locations[0]
            name, region = location['name'], location['region']
            parts = [name] if name == region else [name, region]
            if self.remote:
                parts.append('na dálku')
            parts = list(filter(None, parts))
            if parts:
                return ', '.join(parts)
            return '?'
        else:
            parts = list(sorted(filter(None, [loc['name'] for loc in self.locations])))
            if len(parts) > 2:
                parts = parts[:2]
                if self.remote:
                    parts[-1] += ' a další'
                    parts.append('na dálku')
                    return ', '.join(parts)
                else:
                    return ', '.join(parts) + '…'
            elif parts:
                return ', '.join(parts + (['na dálku'] if self.remote else []))
            if self.remote:
                return 'na dálku'
            return '?'

    @property
    def metrics(self):
        result = {name: 0 for name in JOB_METRIC_NAMES}
        for metric in self.list_metrics:
            result[metric.name] = metric.value
        return result

    @property
    def newsletter_mentions(self):
        return self.list_newsletter_mentions \
            .order_by(JobNewsletterMention.sent_at.desc())

    @classmethod
    def get_by_url(cls, url):
        match = re.match(r'https?://junior.guru/jobs/([^/]+)/', url)
        if match:
            return cls.get_by_id(match.group(1))
        raise ValueError(url)

    @classmethod
    def get_by_link(cls, link):
        return cls.get(cls.link == link)

    @classmethod
    def juniorguru_get_by_id(cls, id):
        return cls.juniorguru_listing().where(cls.id == id).get()

    @classmethod
    def listing(cls):
        return cls.select().order_by(cls.sort_rank.desc())

    @classmethod
    def aggregate_metrics(cls):
        approved_jobs_count = cls.listing() \
            .where(cls.source != 'juniorguru') \
            .count()
        companies_count = len(JobDropped.expired_company_links() |
                              {job.company_link for job in cls.juniorguru_listing()})
        return dict(companies_count=companies_count,
                    jobs_count=cls.listing().count(),
                    approved_jobs_count=approved_jobs_count,
                    rejected_jobs_count=JobDropped.rejected_count())

    @classmethod
    def juniorguru_listing(cls):
        return cls.listing().where(cls.source == 'juniorguru')

    @classmethod
    def region_listing(cls, region):
        locations = cls.locations.tree().alias('locations')
        return cls.listing() \
            .from_(cls, locations) \
            .where((locations.c.key == 'region') &
                   (locations.c.value == region))

    @classmethod
    def remote_listing(cls):
        return cls.listing().where(cls.remote == True)

    @classmethod
    def tags_listing(cls, tags):
        tags = set(tags)
        return [job for job in cls.listing() if tags & set(job.tags())]

    @classmethod
    def internship_listing(cls):
        return cls.tags_listing([
            'INTERNSHIP',
            'UNPAID_INTERNSHIP',
            'ALSO_INTERNSHIP',
        ])

    @classmethod
    def volunteering_listing(cls):
        return cls.tags_listing(['VOLUNTEERING'])

    @classmethod
    def newsletter_listing(cls, min_count, today=None):
        today = today or date.today()

        count = 0
        for item in cls.juniorguru_listing():
            yield item
            count += 1

        backfill_query = cls.listing().where(cls.source != 'juniorguru')
        yield from itertools.islice(backfill_query, max(min_count - count, 0))

    def days_since_posted(self, today=None):
        today = today or date.today()
        return (today - self.posted_at).days

    def days_until_expires(self, today=None):
        today = today or date.today()
        return (self.expires_at - today).days

    def expires_soon(self, today=None):
        today = today or date.today()
        return self.days_until_expires(today=today) <= 10

    def tags(self, today=None):
        tags = []

        today = today or date.today()
        if (today - self.posted_at).days < JOB_IS_NEW_DAYS:
            tags.append('NEW')

        if self.remote:
            tags.append('REMOTE')

        employment_types = frozenset(self.employment_types)
        tags.extend(get_employment_types_tags(employment_types))

        return tags
Beispiel #9
0
class Job(BaseModel):
    id = CharField(primary_key=True)
    posted_at = DateTimeField(index=True)
    title = CharField()
    location = CharField()
    company_name = CharField()
    company_link = CharField(null=True)  # required for JG
    email = CharField(null=True)  # required for JG, null for scraped
    employment_types = EmploymentTypeField()
    description = CharField(null=True)  # required for JG, null for scraped
    lang = CharField(null=True)  # required for scraped, null for JG
    link = CharField(null=True)  # required for scraped
    jg_rank = IntegerField(null=True)  # required for scraped
    source = CharField()
    is_approved = BooleanField(default=False)
    is_sent = BooleanField(default=False)
    is_expired = BooleanField(default=False)
    response_url = CharField(null=True)  # required for scraped, null for JG
    response_backup_path = CharField(null=True)
    item = JSONField(null=True)  # required for scraped, null for JG

    @classmethod
    def listing(cls):
        return cls.juniorguru_listing()

    @classmethod
    def newsletter_listing(cls):
        return cls.select() \
            .where(cls.is_approved == True,
                   cls.is_expired == False,
                   cls.is_sent == False) \
            .order_by(cls.posted_at)

    @classmethod
    def juniorguru_listing(cls):
        return cls.select() \
            .where(cls.source == 'juniorguru',
                   cls.is_approved == True,
                   cls.is_expired == False) \
            .order_by(cls.posted_at.desc())

    @classmethod
    def bot_listing(cls):
        return cls.select() \
            .where(cls.source != 'juniorguru',
                   cls.jg_rank > 0) \
            .order_by(cls.jg_rank.desc(), cls.posted_at.desc())

    @classmethod
    def scraped_listing(cls):
        return cls.select() \
            .where(cls.source != 'juniorguru') \
            .order_by(cls.jg_rank.desc(), cls.posted_at.desc())

    @classmethod
    def count(cls):
        return cls.listing().count()

    @classmethod
    def companies_count(cls):
        return len(frozenset([job.company_link for job in cls.listing()]))