コード例 #1
0
ファイル: miner.py プロジェクト: tfmorris/shortimer
def email_to_job(msg):
    logging.info("looking at email with subject: %s", msg['subject'])

    if not is_job_email(msg):
        return None

    if Job.objects.filter(email_message_id=msg['message-id']).count() == 1:
        return None

    logging.info("parsing job email %s", msg['message-id'])

    j = Job()
    j.contact_name, j.contact_email = rfc822.parseaddr(msg['from'])
    j.contact_name = normalize_name(j.contact_name)
    j.contact_email = j.contact_email.lower()

    # get the employer
    #j.from_domain = j.from_address.split('@')[1]

    j.title = re.sub("^\[CODE4LIB\] ", "", msg['subject'])
    j.title = re.sub("[\n\r]", "", j.title)
    j.email_message_id = msg['message-id']
    j.description = get_html(get_body(msg))

    t = time.mktime(rfc822.parsedate(msg['date']))
    j.post_date = datetime.datetime.fromtimestamp(t)

    if not j.description:
        logging.warn("missing body")
        return None

    j.save()

    # automatically assign subjects based on keywords in the job description
    for n in nouns(j.description):
        n = n.lower()
        for subject in Subject.objects.filter(keywords__name=n):
            j.subjects.add(subject)

    j.save()
    return j
コード例 #2
0
ファイル: miner.py プロジェクト: tfmorris/shortimer
def email_to_job(msg):
    logging.info("looking at email with subject: %s", msg['subject'])

    if not is_job_email(msg):
        return None

    if Job.objects.filter(email_message_id=msg['message-id']).count() == 1:
        return None

    logging.info("parsing job email %s", msg['message-id'])

    j = Job()
    j.contact_name, j.contact_email = rfc822.parseaddr(msg['from'])
    j.contact_name = normalize_name(j.contact_name)
    j.contact_email = j.contact_email.lower()

    # get the employer
    #j.from_domain = j.from_address.split('@')[1]

    j.title = re.sub("^\[CODE4LIB\] ", "", msg['subject'])
    j.title = re.sub("[\n\r]", "", j.title)
    j.email_message_id = msg['message-id']
    j.description = get_html(get_body(msg))

    t = time.mktime(rfc822.parsedate(msg['date']))
    j.post_date = datetime.datetime.fromtimestamp(t)

    if not j.description:
        logging.warn("missing body")
        return None

    j.save()

    # automatically assign subjects based on keywords in the job description
    for n in nouns(j.description):
        n = n.lower()
        for subject in Subject.objects.filter(keywords__name=n):
            j.subjects.add(subject)

    j.save()
    return j
コード例 #3
0
def email_to_job(msg):
    logging.info("looking at email with subject: %s", msg['subject'])

    if not is_job_email(msg):
        return None

    if Job.objects.filter(email_message_id=msg['message-id']).count() == 1:
        return None

    logging.info("parsing job email %s", msg['message-id'])

    j = Job()
    j.contact_name, j.contact_email = rfc822.parseaddr(msg['from'])
    j.contact_name = normalize_name(j.contact_name)
    j.contact_email = j.contact_email.lower()

    # get the employer
    #j.from_domain = j.from_address.split('@')[1]

    j.title = re.sub("^\[CODE4LIB\] ", "", msg['subject'])
    j.title = re.sub("[\n\r]", "", j.title)
    j.email_message_id = msg['message-id']
    j.description = get_html(get_body(msg))

    t = time.mktime(rfc822.parsedate(msg['date']))
    j.post_date = datetime.datetime.fromtimestamp(t)

    if not j.description:
        logging.warn("missing body")
        return None

    if 'http://jobs.code4lib.org' in j.description:
        logging.warn("not loading a job that shortimer posted")
        return None

    j.save()
    autotag(j)
    j.save()
    return j
コード例 #4
0
ファイル: miner.py プロジェクト: bibliotechy/shortimer
def email_to_job(msg):
    logging.info("looking at email with subject: %s", msg['subject'])

    if not is_job_email(msg):
        return None

    if Job.objects.filter(email_message_id=msg['message-id']).count() == 1:
        return None

    logging.info("parsing job email %s", msg['message-id'])

    j = Job()
    j.contact_name, j.contact_email = rfc822.parseaddr(msg['from'])
    j.contact_name = normalize_name(j.contact_name)
    j.contact_email = j.contact_email.lower()

    # get the employer
    #j.from_domain = j.from_address.split('@')[1]

    j.title = re.sub("^\[CODE4LIB\] ", "", msg['subject'])
    j.title = re.sub("[\n\r]", "", j.title)
    j.email_message_id = msg['message-id']
    j.description = get_html(get_body(msg))

    t = time.mktime(rfc822.parsedate(msg['date']))
    j.post_date = datetime.datetime.fromtimestamp(t)

    if not j.description:
        logging.warn("missing body")
        return None

    if 'http://jobs.code4lib.org' in j.description:
        logging.warn("not loading a job that shortimer posted")
        return None

    j.save()
    autotag(j)
    j.save()
    return j