def downloadAvailableEMails(): tableName = N.JOBS_DETAIL_TABLENAME startDate = datetime.datetime(2000, 1, 1, tzinfo=dateutil.tz.tzoffset(None, 0)) connection = MySQLdb.connect(host=N.SQL_HOST_NAME, user=N.SQL_USER_NAME, passwd=N.SQL_PASSWD, db=N.SQL_DB) cursor = connection.cursor() sqlCMD = "SELECT max(pDate) from %s" % (tableName) cursor.execute(sqlCMD) results = cursor.fetchall() if len(results) == 1 and results[0][0] != None: startDate = results[0][0] emails = [email for email in EMailDownloader.getEmails(since=startDate)] if len(emails) == 0: return 0 cPickle.dump( emails, open(N.MAILARCHIVES + "MailArchive%s.pkl" % (datetime.datetime.now()), 'wb')) jobs = [] for email in emails: jobs.extend(email.parse()) EMailSqlDump.dumpJobOffersToSQL(jobs) return len(emails)
def __init__(self, timestamp, startDate, process_step_count, interval, scheduler=None): super(UpdateJob, self).__init__(interval, scheduler) self.startDate = startDate self.timestamp = int(timestamp) self.process_step_count = process_step_count self.getEmailsGenerator = EMailDownloader.getEmails(since=startDate) self.db = None self.processed_count = 0
def downloadAvailableEMails(): tableName = N.JOBS_DETAIL_TABLENAME startDate = datetime.datetime(2000,1,1, tzinfo = dateutil.tz.tzoffset(None,0)) connection = MySQLdb.connect(host=N.SQL_HOST_NAME,user=N.SQL_USER_NAME,passwd=N.SQL_PASSWD,db=N.SQL_DB) cursor = connection.cursor() sqlCMD = "SELECT max(pDate) from %s"%(tableName) cursor.execute(sqlCMD) results = cursor.fetchall() if len(results) == 1 and results[0][0] != None: startDate= results[0][0] emails = [email for email in EMailDownloader.getEmails(since=startDate)] if len(emails) == 0: return 0 cPickle.dump(emails, open(N.MAILARCHIVES + "MailArchive%s.pkl"%(datetime.datetime.now()), 'wb')) jobs = [] for email in emails: jobs.extend(email.parse()) EMailSqlDump.dumpJobOffersToSQL(jobs) return len(emails)
job.URL = jobOfferParts['URL'].strip(' ') job.fillInMissingEntries('\n'.join(self.jobHeader)) #if job.salary_lower == None: # print "Error!" # print '\n'.join(jobOffer) # print "*"*20 jobs.append(job) return jobs if __name__ == '__main__': emails = [] if not os.path.exists(N.MAILARCHIVES + "MailArchive2012-06-08 17-49-05.296393.pkl"): for email in EMailDownloader.getEmails(since=datetime.datetime(2012,6,7,23,24,34)): emails.append(email) print len(emails) cPickle.dump(emails, open(N.MAILARCHIVES + "MailArchive2012-06-08 17-49-05.296393.pkl")) emails = cPickle.load(open(N.MAILARCHIVES + "MailArchive2012-06-08 17-49-05.296393.pkl","rb")) jobs = [] for email in emails: jobsToAdd = email.parse() if len(jobsToAdd) == 0: print email.dateReceivedStr jobs.extend(email.parse()) #for job in jobs: # print job.pDate EMailSqlDump.dumpJobOffersToSQL(jobs)