Exemple #1
0
    def fetch(self, url):
        feed = feedparser.parse(url)
        for entry in feed.entries:
            title = entry.title
            description = entry.summary.replace("\n", " ")
            description = re.sub(r"[ \t]+", " ", description)
            url = entry.link

            if Job.objects.filter(origin_url=url).count() != 0:
                continue
            job = Job(title=title, description=description, origin_url=url, url=url)
            job.save()
            autotag(job)
            log.info("added job for review: %s" % job)
Exemple #2
0
    def fetch(self, url):
        feed = feedparser.parse(url)
        print
        print url
        for entry in feed.entries:
            title = entry.title
            print title
            description = entry.summary.replace("\n", " ")
            description = re.sub(r"[ \t]+", " ", description)
            url = entry.link

            if Job.objects.filter(origin_url=url).count() != 0:
                continue
            job = Job(title=title, description=description, origin_url=url, url=url)
            job.save()
            autotag(job)
            log.info("added job for review: %s" % job)
Exemple #3
0
def email_to_job(msg):
    logging.info("looking at email with subject: %s", msg['subject'])

    if not is_job_email(msg):
        return None

    if Job.objects.filter(email_message_id=msg['message-id']).count() == 1:
        return None

    logging.info("parsing job email %s", msg['message-id'])

    j = Job()
    j.contact_name, j.contact_email = rfc822.parseaddr(msg['from'])
    j.contact_name = normalize_name(j.contact_name)
    j.contact_email = j.contact_email.lower()

    # get the employer
    #j.from_domain = j.from_address.split('@')[1]

    j.title = re.sub("^\[CODE4LIB\] ", "", msg['subject'])
    j.title = re.sub("[\n\r]", "", j.title)
    j.email_message_id = msg['message-id']
    j.description = get_html(get_body(msg))

    t = time.mktime(rfc822.parsedate(msg['date']))
    j.post_date = datetime.datetime.fromtimestamp(t)

    if not j.description:
        logging.warn("missing body")
        return None

    if 'http://jobs.code4lib.org' in j.description:
        logging.warn("not loading a job that shortimer posted")
        return None

    j.save()
    autotag(j)
    j.save()
    return j
Exemple #4
0
def email_to_job(msg):
    logging.info("looking at email with subject: %s", msg['subject'])

    if not is_job_email(msg):
        return None

    if Job.objects.filter(email_message_id=msg['message-id']).count() == 1:
        return None

    logging.info("parsing job email %s", msg['message-id'])

    j = Job()
    j.contact_name, j.contact_email = rfc822.parseaddr(msg['from'])
    j.contact_name = normalize_name(j.contact_name)
    j.contact_email = j.contact_email.lower()

    # get the employer
    #j.from_domain = j.from_address.split('@')[1]

    j.title = re.sub("^\[CODE4LIB\] ", "", msg['subject'])
    j.title = re.sub("[\n\r]", "", j.title)
    j.email_message_id = msg['message-id']
    j.description = get_html(get_body(msg))

    t = time.mktime(rfc822.parsedate(msg['date']))
    j.post_date = datetime.datetime.fromtimestamp(t)

    if not j.description:
        logging.warn("missing body")
        return None

    if 'http://jobs.code4lib.org' in j.description:
        logging.warn("not loading a job that shortimer posted")
        return None

    j.save()
    autotag(j)
    j.save()
    return j
Exemple #5
0
def email_to_job(msg):
    logging.info("looking at email with subject: %s", msg['subject'])

    if not is_job_email(msg):
        return None

    if Job.objects.filter(email_message_id=msg['message-id']).count() == 1:
        return None

    logging.info("parsing job email %s", msg['message-id'])

    j = Job()
    j.contact_name, j.contact_email = rfc822.parseaddr(msg['from'])
    j.contact_name = normalize_name(j.contact_name)
    j.contact_email = j.contact_email.lower()

    # get the employer
    #j.from_domain = j.from_address.split('@')[1]

    j.title = re.sub("^\[CODE4LIB\] ", "", msg['subject'])
    j.title = re.sub("[\n\r]", "", j.title)
    j.email_message_id = msg['message-id']
    j.description = get_html(get_body(msg))

    t = time.mktime(rfc822.parsedate(msg['date']))
    j.post_date = datetime.datetime.fromtimestamp(t)

    if not j.description:
        logging.warn("missing body")
        return None

    j.save()

    # automatically assign subjects based on keywords in the job description
    for n in nouns(j.description):
        n = n.lower()
        for subject in Subject.objects.filter(keywords__name=n):
            j.subjects.add(subject)

    j.save()
    return j
Exemple #6
0
def email_to_job(msg):
    logging.info("looking at email with subject: %s", msg['subject'])

    if not is_job_email(msg):
        return None

    if Job.objects.filter(email_message_id=msg['message-id']).count() == 1:
        return None

    logging.info("parsing job email %s", msg['message-id'])

    j = Job()
    j.contact_name, j.contact_email = rfc822.parseaddr(msg['from'])
    j.contact_name = normalize_name(j.contact_name)
    j.contact_email = j.contact_email.lower()

    # get the employer
    #j.from_domain = j.from_address.split('@')[1]

    j.title = re.sub("^\[CODE4LIB\] ", "", msg['subject'])
    j.title = re.sub("[\n\r]", "", j.title)
    j.email_message_id = msg['message-id']
    j.description = get_html(get_body(msg))

    t = time.mktime(rfc822.parsedate(msg['date']))
    j.post_date = datetime.datetime.fromtimestamp(t)

    if not j.description:
        logging.warn("missing body")
        return None

    j.save()

    # automatically assign subjects based on keywords in the job description
    for n in nouns(j.description):
        n = n.lower()
        for subject in Subject.objects.filter(keywords__name=n):
            j.subjects.add(subject)

    j.save()
    return j