def main():
    db_session = Session()

    spider_usage = get_spiders_usage()

    now = datetime.datetime.now()

    for data in spider_usage:
        spider = data['spider']
        cpu_usage = data['cpu_usage']
        mem_usage = data['mem_usage']

        usage = SpiderResourcesUsage()
        usage.spider_id = spider.id
        usage.worker_server_id = spider.worker_server_id
        usage.time = now
        usage.cpu_usage = cpu_usage
        usage.mem_usage = mem_usage

        db_session.add(usage)

    db_session.commit()
    db_session.close()
Esempio n. 2
0
    def export_delisted_duplicate_errors(self):
        website_id = self.current_crawl.spider.website_id
        crawl_id = self.current_crawl.id
        filename = '%s_%s_delisted_duplicate_errors.csv' % (website_id, crawl_id)
        filename_full = os.path.join(DATA_DIR, filename)
        errors_df = pd.DataFrame(self.errors, dtype=pd.np.str)
        try:
            errors_df.to_csv(filename_full, index=False, encoding='utf-8')
        except:
            errors_df.to_csv(filename_full, index=False)

        db_session = Session()
        dd_error = db_session.query(DelistedDuplicateError)\
            .filter(DelistedDuplicateError.website_id == website_id,
                    DelistedDuplicateError.crawl_id == crawl_id)\
            .first()
        if not dd_error:
            dd_error = DelistedDuplicateError()
        dd_error.website_id = website_id
        dd_error.crawl_id = crawl_id
        dd_error.filename = filename
        db_session.add(dd_error)
        db_session.commit()
        db_session.close()
def main():
    db_session = Session()
    scheduler = SpiderUploadNotificationScheduler()
    e = EmailNotifier(SMTP_USER, SMTP_PASS, SMTP_FROM, SMTP_HOST, SMTP_PORT)
    spider_uploads = db_session.query(SpiderUpload).all()
    for s in spider_uploads:
        if not s.user.email:
            continue
        if scheduler.should_send_initial(s):
            subject = 'Spider upload request %s' % s.spider_name
            text = 'A spider upload has been assigned to you:\n'
            text += 'Account: %s\n' % (s.account.name if s.account else 'New account')
            text += 'Spider: %s\n' % s.spider_name
            if s.notes:
                text += 'Notes: %s' % s.notes
            e.send_notification([s.user.email] + TO, subject, text)
            print s.user.email
            s.last_notification = datetime.now()
            db_session.add(s)
        elif scheduler.should_send_final(s):
            subject = 'Spider deployed %s' % s.spider_name
            text = 'The following spider has been deployed:\n'
            text += 'Account: %s\n' % (s.account.name if s.account else 'New account')
            text += 'Spider: %s\n' % s.spider_name
            if s.notes:
                text += 'Notes: %s' % s.notes
            e.send_notification([s.user.email] + TO, subject, text)
            s.last_notification = datetime.now()
            db_session.add(s)
        elif scheduler.should_send_reminder(s):
            subject = 'Spider upload reminder %s' % s.spider_name
            text = 'The following spider has been assigned to you:\n'
            text += 'Account: %s\n' % (s.account.name if s.account else 'New account')
            text += 'Spider: %s\n' % s.spider_name
            if s.notes:
                text += 'Notes: %s' % s.notes
            e.send_notification([s.user.email], subject, text)
            s.last_notification = datetime.now()
            db_session.add(s)

        db_session.commit()
from sqlalchemy.sql import text

HERE = os.path.dirname(os.path.abspath(__file__))
product_spiders_root = os.path.dirname(HERE)
project_root = os.path.dirname(product_spiders_root)

sys.path.append(project_root)
sys.path.append(os.path.join(project_root, 'product_spiders'))

from product_spiders.db import Session
from scrapy.utils.misc import walk_modules
from scrapy.utils.spider import iter_spider_classes
from productspidersweb.models import Spider

print sys.path
here = os.path.abspath(os.path.dirname(__file__))

db_session = Session()

spider_modules = ['product_spiders.spiders']

for name in spider_modules:
    for module in walk_modules(name):
        for spider in iter_spider_classes(module):
            sp = db_session.query(Spider).filter(
                Spider.name == spider.name).first()
            if sp:
                sp.module = str(spider.__module__)
                db_session.add(sp)

db_session.commit()