コード例 #1
0
def downloadAvailableEMails():
    tableName = N.JOBS_DETAIL_TABLENAME
    startDate = datetime.datetime(2000,
                                  1,
                                  1,
                                  tzinfo=dateutil.tz.tzoffset(None, 0))
    connection = MySQLdb.connect(host=N.SQL_HOST_NAME,
                                 user=N.SQL_USER_NAME,
                                 passwd=N.SQL_PASSWD,
                                 db=N.SQL_DB)
    cursor = connection.cursor()
    sqlCMD = "SELECT max(pDate) from %s" % (tableName)
    cursor.execute(sqlCMD)
    results = cursor.fetchall()
    if len(results) == 1 and results[0][0] != None:
        startDate = results[0][0]

    emails = [email for email in EMailDownloader.getEmails(since=startDate)]
    if len(emails) == 0: return 0

    cPickle.dump(
        emails,
        open(N.MAILARCHIVES + "MailArchive%s.pkl" % (datetime.datetime.now()),
             'wb'))

    jobs = []
    for email in emails:
        jobs.extend(email.parse())
    EMailSqlDump.dumpJobOffersToSQL(jobs)

    return len(emails)
コード例 #2
0
 def finish(self, cursor):
     try:
         jobOffers = list(self.fetchJobOffers())
         if len(jobOffers) == 0:
             raise Exception("!No job offers!")
         EMailSqlDump.dumpJobOfferSummary(jobOffers, self.timestamp, tableName = N.JOBS_SUMMARY_TABLENAME, cursor=cursor)
         EMailSqlDump.dumpBand5OfferSummary(jobOffers, self.timestamp, tableName = N.JOBS_BAND5_SUMMARY_TABLENAME, cursor=cursor)
         EMailUpdates.sendEmailUpdate()
         logging.info("Finish, processed count: " + str(self.processed_count))
         return True
     except:
         logging.exception("Failed to finish UpdateJob")
         return False
コード例 #3
0
 def process_email(self, email, cursor):
     jobOffers = email.parse()
     if len(jobOffers) == 0:
         logging.debug("Could not find jobOffers for email at: " + str(email.dateReceivedStr).strip() + ", subject: " + str(email.subject).strip());
         return False
     alreadyPostedJobs = EMailSqlDump.getPostedJobsAndCounts(cursor, N.JOBS_DETAIL_TABLENAME, jobOffers)
     for jobOffer in jobOffers:
         if str(jobOffer.uniqueURLID) in alreadyPostedJobs: 
             alreadyPostedJobs[str(jobOffer.uniqueURLID)]+=1
             EMailSqlDump.updateRepostedCount(cursor, N.JOBS_DETAIL_TABLENAME, int(jobOffer.uniqueURLID), alreadyPostedJobs[str(jobOffer.uniqueURLID)])
         else:
             EMailSqlDump.dumpJobOffer(jobOffer, tableName = N.JOBS_DETAIL_TABLENAME, cursor = cursor, timestamp = self.timestamp)
     return True
コード例 #4
0
def downloadAvailableEMails():
    tableName = N.JOBS_DETAIL_TABLENAME
    startDate = datetime.datetime(2000,1,1, tzinfo = dateutil.tz.tzoffset(None,0))
    connection = MySQLdb.connect(host=N.SQL_HOST_NAME,user=N.SQL_USER_NAME,passwd=N.SQL_PASSWD,db=N.SQL_DB)
    cursor = connection.cursor()
    sqlCMD = "SELECT max(pDate) from %s"%(tableName)
    cursor.execute(sqlCMD)
    results = cursor.fetchall()
    if len(results) == 1 and results[0][0] != None:
        startDate= results[0][0]
    
    emails = [email for email in EMailDownloader.getEmails(since=startDate)]
    if len(emails) == 0: return 0
    
    cPickle.dump(emails, open(N.MAILARCHIVES +  "MailArchive%s.pkl"%(datetime.datetime.now()), 'wb'))
    
    jobs = []
    for email in emails:
        jobs.extend(email.parse())
    EMailSqlDump.dumpJobOffersToSQL(jobs)
    
    return len(emails)
コード例 #5
0
ファイル: EMail.py プロジェクト: hosseinamin/emailJobParser
            job.URL = jobOfferParts['URL'].strip(' ')

            job.fillInMissingEntries('\n'.join(self.jobHeader))
            
            #if job.salary_lower == None:
            #    print "Error!"
            #    print '\n'.join(jobOffer)
            #    print "*"*20
            jobs.append(job)
        return jobs

if __name__ == '__main__':
    emails = []
    
    if not os.path.exists(N.MAILARCHIVES + "MailArchive2012-06-08 17-49-05.296393.pkl"):
        for email in EMailDownloader.getEmails(since=datetime.datetime(2012,6,7,23,24,34)):
            emails.append(email)
        print len(emails)
        cPickle.dump(emails, open(N.MAILARCHIVES + "MailArchive2012-06-08 17-49-05.296393.pkl"))

    emails = cPickle.load(open(N.MAILARCHIVES + "MailArchive2012-06-08 17-49-05.296393.pkl","rb"))
    jobs = []
    for email in emails:
        jobsToAdd = email.parse()
        if len(jobsToAdd) == 0:
            print email.dateReceivedStr
        jobs.extend(email.parse())
    #for job in jobs:
    #    print job.pDate
    EMailSqlDump.dumpJobOffersToSQL(jobs)