Beispiel #1
0
                            if db:
                                try:
                                    db.execute(sql)
                                except:
                                    logger.warn(
                                        "insert failed sql %s --> err %s", sql,
                                        traceback.format_exc())
                            if csv_w:
                                writecsv(csv_w, [
                                    d,
                                    m.get('outer_code',
                                          ''), m['commission_rate'],
                                    m['item_title'], m['seller_nick'],
                                    m['num_iid'], m['shop_title'],
                                    m['app_key'], m['commission'],
                                    m['trade_id'], m['pay_time'],
                                    m['item_num'], m['category_id'],
                                    m['pay_price'], m['real_pay_fee'],
                                    m.get('category_name', '')
                                ])
                        except:
                            logger.error("Got error %s %s", m,
                                         traceback.format_exc())
        except:
            logger.error("Got fatal error %s %s", d, traceback.format_exc())


if __name__ == "__main__":
    log_init("TaobaoLogger", "sqlalchemy.*")
    main()
Beispiel #2
0
                        loan_obj.rate = str(loan.xpath("td[4]/text()")[0]).strip().replace("%", "")
                        period = str(loan.xpath("td[5]/text()")[0].encode("UTF-8")).strip().replace(" ", "")
                        if period.find(loan_obj.PERIOD_UNIT_DAY) > 0:
                            loan_obj.period = period.replace(loan_obj.PERIOD_UNIT_DAY, "")
                            loan_obj.period_unit = loan_obj.PERIOD_UNIT_DAY
                        else:
                            loan_obj.period = period.replace("个", "").replace(loan_obj.PERIOD_UNIT_MONTH, "")
                            loan_obj.period_unit = loan_obj.PERIOD_UNIT_MONTH

                        loan_obj.schedule = float(str(loan.xpath("td[last()]/p[1]/text()")[0].encode("UTF-8")).strip().replace(" ", "").replace("%", "").split("完成")[1])

                        loan_obj.db_create(db)

        logger.info("company %s crawler loan: new size %s, update size %s", company_id, len(new_ids_set), len(update_ids_set))

        # db - 新抓取的 = 就是要下线的
        off_ids_set = db_ids_set - online_ids_set
        if off_ids_set:
            loan_obj = Loan(company_id)
            loan_obj.db_offline(db, off_ids_set)
            logger.info("company %s crawler loan: offline %s", company_id, len(off_ids_set))

    except:
        logger.error("url: %s xpath failed:%s", url, traceback.format_exc())


if __name__ == "__main__":
    log_init("CrawlLogger", "sqlalchemy.*")

    crawl()
Beispiel #3
0

if __name__ == "__main__":
    gflags.DEFINE_boolean('daemon', False, "is start in daemon mode?")
    gflags.DEFINE_boolean('webdebug', False, "is web.py debug")
    gflags.DEFINE_boolean('reload', False, "is web.py reload app")
    gflags.DEFINE_string('qqhost', 'test.qq.com', "fake qq host")
    gflags.DEFINE_string('dumpcookiepath', '/tmp/qq_cookie.txt',
                         "dump cookie path")
    gflags.DEFINE_integer('qqport', 8025, "fake qq port")
    backup_args = []
    backup_args.extend(sys.argv)
    sys.argv = [
        sys.argv[0],
    ] + sys.argv[2:]
    log_init('QzoneLogger', "sqlalchemy.*")
    sys.argv = backup_args[:2]
    web.config.debug = FLAGS.webdebug
    if len(sys.argv) == 1:
        web.wsgi.runwsgi = lambda func, addr=None: web.wsgi.runfcgi(func, addr)
    else:
        FLAGS.qqport = sys.argv[1]
    if FLAGS.daemon:
        if not FLAGS.pidfile:
            pidfile = os.path.join(file_path, 'qq_login_proxy.pid')
        else:
            pidfile = FLAGS.pidfile
        daemon.daemonize(pidfile)
    #render = web.template.render('templates/', base='layout')
    app = web.application(urls, globals(), autoreload=FLAGS.reload)
    app.run()
Beispiel #4
0
    db.query("set autocommit=0;")
    for row in results.fetch_row(maxrows=0):
        item_id = row[0]
        result = row[1]
        is_image_crawled = row[2]
        i += 1
        if result == 1 and is_image_crawled == 1:
            try_query(db,
                      "update item set crawl_status=2 where id=%s" % item_id)
        if result == 1 and is_image_crawled == 0:
            try_query(db,
                      "update item set crawl_status=1 where id=%s" % item_id)
        if result == 0:
            try_query(db,
                      "update item set crawl_status=0 where id=%s" % item_id)
        if i % 1000 == 0:
            logger.debug("processing %s %s %s/%s", row[3], item_id, i, 1194351)
            db.commit()
    db.commit()
    db.close()


if __name__ == "__main__":
    # usage:  ip:port --daemon --stderr ...
    gflags.DEFINE_boolean('daemon', False, "is start in daemon mode?")
    log_init('AppLogger', "sqlalchemy.*")
    #if FLAGS.daemon:
    #    file_path = os.path.split(os.path.abspath(__file__))[0]
    #    daemon.daemonize(os.path.join(file_path, 'app.pid'))
    main()
Beispiel #5
0
from pygaga.helpers.lock import lock
from pygaga.helpers.logger import log_init

if __name__ == "__main__":
    log_init("Logger")
    print "entering lock"
    with lock("mylock"):
        print "running"
    print "done"
Beispiel #6
0
                            "delete from tbk_item_convert where item_id=%s" %
                            id)
                except KeyboardInterrupt:
                    raise
                except Exception, e:
                    logger.debug("in %s out %s" % (numid2id, result))
                    logger.warn("convert failed %s %s" %
                                (sql, traceback.format_exc()))
        except KeyboardInterrupt:
            raise
        except:
            logger.warn("process failed %s %s reason %s" %
                        (input, output, traceback.format_exc()))
    logger.info("Convert result %s - %s", converted, total)


if __name__ == "__main__":
    log_init(['TaobaokeLogger', 'TaobaoLogger'], "sqlalchemy.*")
    if FLAGS.action == 'remove':
        if FLAGS.all:
            do_all(rollback_shop)
        else:
            rollback_shop(FLAGS.shop, None)
    elif FLAGS.action == 'update':
        if FLAGS.all:
            do_all(update_shop)
        else:
            update_shop(FLAGS.shop, None)
    elif FLAGS.action == 'vip':
        update_vip_shop(FLAGS.shop)
Beispiel #7
0
    def test():
        log_init("CrawlLogger", "sqlalchemy.*")
        db = get_db_engine()

        #测试新建
        theshop = ShopExtendInfo(db, 10000001)
        theshop.main_category = "服饰箱包"
        theshop.location = "浙江杭州"
        theshop.good_item_rate = 99.98
        theshop.described_remark = 4.8
        theshop.described_remark_compare = 32.13
        theshop.service_remark = 4.6
        theshop.service_remark_compare = -15.20
        theshop.support_returnin7day = 0
        theshop.support_cash = 1
        theshop.support_consumer_guarantees = 1
        theshop.support_credit_card = 0
        theshop.open_at = datetime.datetime.strptime("2013-1-1", "%Y-%m-%d")
        theshop.favorited_user_count = 1234567890
        theshop.save()

        testtheshop = ShopExtendInfo(db, 10000001)
        assert_equal(testtheshop.main_category, "服饰箱包")
        assert_equal(testtheshop.location, "浙江杭州")
        assert_equal(testtheshop.good_item_rate, 99.98)
        assert_equal(testtheshop.described_remark, 4.8)
        assert_equal(testtheshop.described_remark_compare, 32.13)
        assert_equal(testtheshop.service_remark, 4.6)
        assert_equal(testtheshop.service_remark_compare, -15.20)
        assert_equal(testtheshop.support_returnin7day, 0)
        assert_equal(testtheshop.support_cash, 1)
        assert_equal(testtheshop.support_consumer_guarantees, 1)
        assert_equal(testtheshop.support_credit_card, 0)
        assert_equal(testtheshop.open_at, datetime.date(2013, 1, 1))
        assert_equal(testtheshop.favorited_user_count, 1234567890)

        #测试修改部分
        theshop = ShopExtendInfo(db, 10000001)
        theshop.main_category = "服饰箱包TEST"
        theshop.location = "浙江杭州TEST"
        theshop.good_item_rate = 10.98
        theshop.described_remark = 3.8
        theshop.described_remark_compare = -32.13
        theshop.service_remark = 3.6
        theshop.save()

        testtheshop = ShopExtendInfo(db, 10000001)
        assert_equal(testtheshop.main_category, "服饰箱包TEST")
        assert_equal(testtheshop.location, "浙江杭州TEST")
        assert_equal(testtheshop.good_item_rate, 10.98)
        assert_equal(testtheshop.described_remark, 3.8)
        assert_equal(testtheshop.described_remark_compare, -32.13)
        assert_equal(testtheshop.service_remark, 3.6)

        #测试修改全部
        theshop = ShopExtendInfo(db, 10000001)
        theshop.main_category = "服饰箱包Test2"
        theshop.location = "浙江杭州Test2"
        theshop.good_item_rate = 13.98
        theshop.described_remark = 4.7
        theshop.described_remark_compare = 10.13
        theshop.service_remark = 4.8
        theshop.service_remark_compare = -12.20
        theshop.support_returnin7day = 1
        theshop.support_cash = 1
        theshop.support_consumer_guarantees = 0
        theshop.support_credit_card = 1
        theshop.open_at = datetime.datetime.strptime("2013-2-1", "%Y-%m-%d")
        theshop.favorited_user_count = 1234567891
        theshop.save()

        testtheshop = ShopExtendInfo(db, 10000001)
        assert_equal(testtheshop.main_category, "服饰箱包Test2")
        assert_equal(testtheshop.location, "浙江杭州Test2")
        assert_equal(testtheshop.good_item_rate, 13.98)
        assert_equal(testtheshop.described_remark, 4.7)
        assert_equal(testtheshop.described_remark_compare, 10.13)
        assert_equal(testtheshop.service_remark, 4.8)
        assert_equal(testtheshop.service_remark_compare, -12.20)
        assert_equal(testtheshop.support_returnin7day, 1)
        assert_equal(testtheshop.support_cash, 1)
        assert_equal(testtheshop.support_consumer_guarantees, 0)
        assert_equal(testtheshop.support_credit_card, 1)
        assert_equal(testtheshop.open_at, datetime.date(2013, 2, 1))
        assert_equal(testtheshop.favorited_user_count, 1234567891)

        #测试修改全部
        theshop = ShopExtendInfo(db, 10000001)
        theshop.main_category = "服饰箱包Test2"
        theshop.location = "浙江杭州Test2"
        theshop.good_item_rate = 100.00
        theshop.described_remark = 4.7
        theshop.described_remark_compare = 100.00
        theshop.service_remark = 4.8
        theshop.service_remark_compare = -100.00
        theshop.shipping_remark = 5.0
        theshop.shipping_remark_compare = -100.00
        theshop.support_returnin7day = 1
        theshop.support_cash = 1
        theshop.support_consumer_guarantees = 0
        theshop.support_credit_card = 1
        theshop.open_at = datetime.datetime.strptime("2013-2-1", "%Y-%m-%d")
        theshop.favorited_user_count = 1234567891
        theshop.save()

        testtheshop = ShopExtendInfo(db, 10000001)
        assert_equal(testtheshop.main_category, "服饰箱包Test2")
        assert_equal(testtheshop.location, "浙江杭州Test2")
        assert_equal(testtheshop.good_item_rate, 100.00)
        assert_equal(testtheshop.described_remark, 4.7)
        assert_equal(testtheshop.described_remark_compare, 100.00)
        assert_equal(testtheshop.service_remark, 4.8)
        assert_equal(testtheshop.service_remark_compare, -100.00)
        assert_equal(testtheshop.shipping_remark, 5.0)
        assert_equal(testtheshop.shipping_remark_compare, -100.00)
        assert_equal(testtheshop.support_returnin7day, 1)
        assert_equal(testtheshop.support_cash, 1)
        assert_equal(testtheshop.support_consumer_guarantees, 0)
        assert_equal(testtheshop.support_credit_card, 1)
        assert_equal(testtheshop.open_at, datetime.date(2013, 2, 1))
        assert_equal(testtheshop.favorited_user_count, 1234567891)
Beispiel #8
0
gflags.DEFINE_integer('itemid', 0, "crawl item id")
gflags.DEFINE_integer('shopid', 0, "crawl shop id")
gflags.DEFINE_integer('limit', 0, "limit crawl items count")
gflags.DEFINE_integer('interval', 0, "crawl interval between items in ms")
gflags.DEFINE_string('where', "", "additional where sql, e.g. a=b and c=d")
gflags.DEFINE_boolean('all', False, "crawl all items")
gflags.DEFINE_boolean('pending', False, "crawl pending items")
gflags.DEFINE_boolean('changed', False, "crawl items that changed recent")
gflags.DEFINE_boolean('commit', True, "is commit data into database?")
gflags.DEFINE_boolean('force', False, "is crawl offline items?")
gflags.DEFINE_boolean('debug_parser', False, "debug html parser?")

gflags.DEFINE_boolean('update_comments', False, "is update comments?")
gflags.DEFINE_integer('max_comments', 0, "max comments crawled")
gflags.DEFINE_boolean('update_main', True, "is update price, desc and images?")
gflags.DEFINE_boolean('dump', False, "dump html content?")

gflags.DEFINE_boolean('clean_redis', False,
                      "is clean redis comments then recrawl?")
gflags.DEFINE_string('redishost', "127.0.0.1", "redis host")
gflags.DEFINE_integer('redisport', 9089, "redis port")

gflags.DEFINE_boolean('hotest', False,
                      "is update hsdl-guang-bi-db1 item_hotest comments?")
gflags.DEFINE_string('bihost', "127.0.0.1", "bi1 host")
gflags.DEFINE_integer('mostPage', 20, "comment most page")

if __name__ == "__main__":
    log_init(["CrawlLogger", "urlutils"], "sqlalchemy.*")
    crawl_item_main()
                                continue
                            sql = """insert into taobao_report (outer_code, commission_rate, item_title, seller_nick,
                                num_iid, shop_title, app_key, commission, trade_id, pay_time, item_num,
                                category_id, pay_price, real_pay_fee, category_name, create_time) values (
                                "%s", "%s", "%s", "%s", %s, "%s", "%s", "%s", %s, "%s", %s, %s, "%s", "%s", "%s", now()
                                )""" % (
                                m.get('outer_code', ''), m['commission_rate'].replace('%', '%%'), m['item_title'].replace('%', '%%'),
                                m['seller_nick'].replace('%', '%%'), m['num_iid'],
                                m['shop_title'].replace('%', '%%'), m['app_key'], m['commission'], m['trade_id'], m['pay_time'], m['item_num'],
                                m['category_id'], m['pay_price'], m['real_pay_fee'], m.get('category_name','').replace('%', '%%')
                                )
                            logger.debug(sql)
                            if db:
                                try:
                                    db.execute(sql)
                                except:
                                    logger.warn("insert failed sql %s --> err %s", sql, traceback.format_exc())
                            if csv_w:
                                writecsv(csv_w, [d, m.get('outer_code', ''), m['commission_rate'], m['item_title'], m['seller_nick'], m['num_iid'],
                                    m['shop_title'], m['app_key'], m['commission'], m['trade_id'], m['pay_time'], m['item_num'],
                                    m['category_id'], m['pay_price'], m['real_pay_fee'], m.get('category_name', '')])
                        except:
                            logger.error("Got error %s %s", m, traceback.format_exc())
                pageno += 1
        except:
            logger.error("Got fatal error %s %s", d, traceback.format_exc())

if __name__ == "__main__":
    log_init("TaobaoLogger", "sqlalchemy.*")
    main()
    for batch_item in results:
        for iid, item in batch_item.items.items():
            try:
                counter += 1
                item_id = item['req'][0]
                item_iid = item['req'][1]
                item_price = item['req'][2]
                #item_picurl = item['req'][3]
                if item['resp']:
                    if item['resp']['approve_status'] != 'onsale':
                        logger.debug("Item %s/%s %s %s is offshelf", counter, total, item_id, item_iid)
                        off_counter += 1
                        write_db.execute("update item set status=2, modified=now()  where id=%s" % item_id)
                    else:
                        price = float(item['resp']['price'])
                        #title = item['resp']['title']
                        #pic_url = item['resp']['pic_url']
                        if abs(item_price - price) / (item_price + 0.0000001) > 0.2 or abs(item_price - price) > 2.0:
                            change_counter += 1
                            logger.debug("Item %s/%s %s %s price %s -> %s", counter, total, item_id, item_iid, item_price, price)
                            if FLAGS.commit_price:
                                write_db.execute("update item set price=%s where id=%s" % (price, item_id))
                logger.debug("req %s resp %s", item['req'], item['resp'])
            except:
                logger.error("update failed %s", traceback.format_exc())
    logger.info("Taobao quickupdate, total %s, off %s, price change %s, volume change %s", total, off_counter, change_counter, vol_change_counter)

if __name__ == "__main__":
    log_init(['CrawlLogger', 'TaobaoLogger',], "sqlalchemy.*")
    crawl_main()
        logger.info("crawled %s len %s", url, len(data))
    except KeyboardInterrupt:
        raise
    except:
        logger.warn("crawl failed %s exception %s", url, traceback.format_exc())

def crawl_page(item_id, url, headers):
    logger.debug("Crawling %s", url)
    data = ""
    try:
        req = urllib2.Request(url, headers=headers)
        u = urllib2.urlopen(req)
        data = u.read()
        u.close()
    except ValueError, e:
        logger.info("download %s:%s url value error %s", item_id, url, e.message)
    except HTTPError, e1:
        if e1.code != 404:
            logger.info("download %s:%s failed http code: %s", item_id, url, e1.code)
    except URLError, e2:
        logger.info("download %s:%s failed url error: %s", item_id, url, e2.reason)
    except socket.timeout:
        logger.info("download %s:%s failed socket timeout", item_id, url)
    return data

if __name__ == "__main__":
    log_init("MeiliCrawlLogger")

    crawl_all()

Beispiel #12
0
from pygaga.helpers.dbutils import get_db_engine
from pygaga.helpers.urlutils import download

gflags.DEFINE_string('solr_host', '192.168.10.78', 'solr host')

FLAGS = gflags.FLAGS

logger = logging.getLogger("statslogger")

SOLR_URL = "/solr/select?q=*%3A*&fq=item_id%3A%5B0+TO+*%5D&fq=term_parent_cid%3A3+OR+term_parent_cid%3A4+OR+term_parent_cid%3A5+OR+term_parent_cid%3A6+&fq=lctr_s2_2%3A*&start=0&rows=120&sort=lctr_s2_2+desc&wt=json&version=2"

def main():
    url = "http://%s:7080%s" % (FLAGS.solr_host, SOLR_URL)
    #import pdb; pdb.set_trace()
    results = simplejson.loads(download(url))
    db = get_db_engine()
    counts = []
    for doc in results['response']['docs']:
        item_id = doc['item_id']
        count = db.execute("select count(id) from favourite where itemid=%s and acttime>'2012-12-01' and favstatus=1 and firstchoose=0;" % item_id)
        if count.rowcount:
            counts.append(list(count)[0][0])
        else:
            counts.append(0)
    cs = Series(counts)
    logger.info(cs.describe())

if __name__ == '__main__':
    log_init(['statslogger','urlutils'], "sqlalchemy.*")
    main()
Beispiel #13
0
                item_matched)
    return click_items, paid_items


def load_pay_items(paid_items):
    logger.info("Loading pay items")
    pay_item_type = namedtuple(
        'PayItemType',
        'created name num_id shop_id shop_name count price total_price comm_rate total_comm status order_id'
    )

    pay_items = []
    db = get_db_engine()
    for id in paid_items:
        results = db.execute(
            "select create_time,item_title,num_iid,shop.id,shop.taobao_title,item_num,real_pay_fee,pay_price,commission_rate,commission,taobao_report.status,trade_id from taobao_report,item,shop where item.shop_id=shop.id and cast(taobao_report.num_iid as char)=item.num_id and taobao_report.id=%s"
            % id)
        if results.rowcount:
            line = list(results)[0]
            #logger.debug("loaded pay items %s %s", id, line)
            pay_item = pay_item_type(*line)
            pay_items.append(pay_item)
        else:
            logger.warn("not faound taobaoreport %s", id)
    return pay_items


if __name__ == '__main__':
    log_init(["GuangLogger", "urlutils"], "sqlalchemy.*")
    est_main()
Beispiel #14
0
#!/Library/Frameworks/Python.framework/Versions/2.7/Resources/Python.app/Contents/MacOS/Python
# coding: utf-8

import gflags
import sys

from pygaga.helpers.logger import log_init
from guang_crawler.crawl_image_impl import crawl_image_main

gflags.DEFINE_string('path', "/space/wwwroot/image.guang.j.cn/ROOT/images/", "image path")
gflags.DEFINE_string('org_path', "/space/wwwroot/image.guang.j.cn/ROOT/org_images/", "org image path")
gflags.DEFINE_string('crawl_path', "/space/crawler/image_crawler/static", "image path")
gflags.DEFINE_integer('itemid', 0, "crawl item id")
gflags.DEFINE_integer('numid', 0, "crawl item num id")
gflags.DEFINE_integer('limit', 0, "limit crawl items count")
gflags.DEFINE_string('where', "", "additional where sql, e.g. a=b and c=d")
gflags.DEFINE_boolean('all', False, "crawl all items")
gflags.DEFINE_boolean('pending', False, "crawl pending items")
gflags.DEFINE_boolean('commit', True, "is commit data into database?")
gflags.DEFINE_boolean('removetmp', False, "is remove temperary image files after crawl?")
gflags.DEFINE_boolean('force', False, "is force crawl?")
#gflags.DEFINE_boolean('uploadfastdfs', True, "is upload to fastdfs?")
#gflags.DEFINE_boolean('uploadnfs', False, "is upload to nfs?")
#gflags.DEFINE_boolean('uploadorg', True, "is upload origin image to nfs?")

if __name__ == "__main__":
    log_init(["CrawlLogger","urlutils"], "sqlalchemy.*")
    crawl_image_main()

Beispiel #15
0
    except:
        logger.error("unknown error %s, %s", item, traceback.format_exc())


def crawl_page(item_id, url, headers):
    logger.debug("Crawling %s", url)
    data = ""
    try:
        req = urllib2.Request(url, headers=headers)
        u = urllib2.urlopen(req)
        data = u.read()
        u.close()
    except ValueError, e:
        logger.info("download %s:%s url value error %s", item_id, url,
                    e.message)
    except HTTPError, e1:
        logger.info("download %s:%s failed http code: %s", item_id, url,
                    e1.code)
    except URLError, e2:
        logger.info("download %s:%s failed url error: %s", item_id, url,
                    e2.reason)
    except socket.timeout:
        logger.info("download %s:%s failed socket timeout", item_id, url)
    return data


if __name__ == "__main__":
    log_init("ProcessItemLogger")

    process_all_items()
Beispiel #16
0
#!/Library/Frameworks/Python.framework/Versions/2.7/Resources/Python.app/Contents/MacOS/Python
# coding: utf-8

import gflags
import sys

from pygaga.helpers.logger import log_init
from guang_crawler.fix_thumb_impl import fix_thumb_main

gflags.DEFINE_string('path', "/space/wwwroot/image.guang.j.cn/ROOT/images/", "image path")
gflags.DEFINE_string('org_path', "/space/wwwroot/image.guang.j.cn/ROOT/org_images/", "org image path")
gflags.DEFINE_string('crawl_path', "/space/crawler/image_crawler/static", "image path")
gflags.DEFINE_integer('itemid', 0, "crawl item id")
gflags.DEFINE_integer('limit', 0, "limit crawl items count")
gflags.DEFINE_string('where', "", "additional where sql, e.g. a=b and c=d")
gflags.DEFINE_boolean('all', False, "crawl all items")
gflags.DEFINE_boolean('removetmp', False, "is remove temperary image files after crawl?")
gflags.DEFINE_boolean('force', False, "is force crawl?")

if __name__ == "__main__":
    log_init('CrawlLogger', "sqlalchemy.*")
    fix_thumb_main()

    i = 0
    db.autocommit(False)
    db.query("set autocommit=0;")
    for row in results.fetch_row(maxrows=0):
        item_id = row[0]
        result = row[1]
        is_image_crawled = row[2]
        i += 1
        if result == 1 and is_image_crawled == 1:
            try_query(db, "update item set crawl_status=2 where id=%s" % item_id)
        if result == 1 and is_image_crawled == 0:
            try_query(db, "update item set crawl_status=1 where id=%s" % item_id)
        if result == 0:
            try_query(db, "update item set crawl_status=0 where id=%s" % item_id)
        if i % 1000 == 0:
            logger.debug("processing %s %s %s/%s", row[3], item_id, i, 1194351)
            db.commit()
    db.commit()
    db.close()

if __name__ == "__main__":
    # usage:  ip:port --daemon --stderr ...
    gflags.DEFINE_boolean('daemon', False, "is start in daemon mode?")
    log_init('AppLogger', "sqlalchemy.*")
    #if FLAGS.daemon:
    #    file_path = os.path.split(os.path.abspath(__file__))[0]
    #    daemon.daemonize(os.path.join(file_path, 'app.pid'))
    main()

Beispiel #18
0
    for page in waitlimit(calllimit, 60.0, xrange(1, page_count)):
        items = get_items(top, nick, page, page_size, expire)
        yield items

def crawl_rates(top, items, page_size=40, calllimit=60, expire=600.0):
    for item in waitlimit(calllimit, 60.0, items):
        page = 0
        while True:
            page += 1
            rates = get_rate(top, item[2], item[1], page, page_size)
            if not rates:
                break
            yield rates
"""

if __name__ == '__main__':
    log_init("TaobaoLogger")
    """
    print list(get_taobao_items(get_top(), ["19555209099",]))
    #time.sleep(1)
    print get_promotion_info(get_top(), "23476128281")
    print get_spmeffect_trade(get_taobaoke_top(), '2013-04-22')
    print get_spmeffect_trade(get_taobaoke_top(), '2013-04-23')
    print get_spmeffect_trade(get_taobaoke_top(), '2013-04-22', True, True)
    print get_spmeffect_trade(get_taobaoke_top(), '2013-04-23', True, True)
    #get_taobao_cates(get_taobaoke_top())
    #get_taobao_trade(get_rand_top(), '207801937350421')
    """

    print get_taobao_itemcats(get_top(), "50124001")
Beispiel #19
0

ENV.filters['my_filter'] = my_filter


class home:
    def GET(self):
        return 'test'


if __name__ == "__main__":
    # usage: ${prog} ip:port --daemon --stderr ...
    gflags.DEFINE_boolean('daemon', False, "is start in daemon mode?")
    gflags.DEFINE_boolean('webdebug', False, "is web.py debug")
    gflags.DEFINE_boolean('reload', False, "is web.py reload app")
    backup_args = []
    backup_args.extend(sys.argv)
    sys.argv = [
        sys.argv[0],
    ] + sys.argv[2:]
    log_init('WebLogger', "sqlalchemy.*")
    sys.argv = backup_args[:2]
    web.config.debug = FLAGS.webdebug
    if len(sys.argv) == 1:
        web.wsgi.runwsgi = lambda func, addr=None: web.wsgi.runfcgi(func, addr)
    if FLAGS.daemon:
        daemon.daemonize(os.path.join(file_path, 'web.pid'))
    #render = web.template.render('templates/', base='layout')
    app = web.application(urls, globals(), autoreload=FLAGS.reload)
    app.run()
Beispiel #20
0
        return ""

class palette_png:
    def GET(self):
        web.header("Content-Type", "images/png")
        params = web.input()
        data = StringIO()
        colors_as_image(params.c.split(",")).save(data, 'png')
        data.seek(0)
        return data.read()
"""

if __name__ == "__main__":
    gflags.DEFINE_boolean('daemon', False, "is start in daemon mode?")
    gflags.DEFINE_boolean('webdebug', False, "is web.py debug")
    gflags.DEFINE_boolean('reload', False, "is web.py reload app")
    gflags.DEFINE_string('solr_host', 'sdl-guang-solr4', 'solr host')
    backup_args = []
    backup_args.extend(sys.argv)
    sys.argv = [sys.argv[0],] + sys.argv[2:]
    log_init('GuangLogger', "sqlalchemy.*")
    sys.argv = backup_args[:2]
    web.config.debug = FLAGS.webdebug
    if len(sys.argv) == 1:
        web.wsgi.runwsgi = lambda func, addr=None: web.wsgi.runfcgi(func, addr)
    if FLAGS.daemon:
        daemon.daemonize(os.path.join(file_path, 'solrweb.pid'))
    #render = web.template.render('templates/', base='layout')
    app = web.application(urls, globals(), autoreload=FLAGS.reload)
    app.run()
Beispiel #21
0
    for page in waitlimit(calllimit, 60.0, xrange(1, page_count)):
        items = get_items(top, nick, page, page_size, expire)
        yield items

def crawl_rates(top, items, page_size=40, calllimit=60, expire=600.0):
    for item in waitlimit(calllimit, 60.0, items):
        page = 0
        while True:
            page += 1
            rates = get_rate(top, item[2], item[1], page, page_size)
            if not rates:
                break
            yield rates
"""

if __name__ == '__main__':
    log_init("TaobaoLogger")
    """
    print list(get_taobao_items(get_top(), ["19555209099",]))
    #time.sleep(1)
    print get_promotion_info(get_top(), "23476128281")
    print get_spmeffect_trade(get_taobaoke_top(), '2013-04-22')
    print get_spmeffect_trade(get_taobaoke_top(), '2013-04-23')
    print get_spmeffect_trade(get_taobaoke_top(), '2013-04-22', True, True)
    print get_spmeffect_trade(get_taobaoke_top(), '2013-04-23', True, True)
    #get_taobao_cates(get_taobaoke_top())
    #get_taobao_trade(get_rand_top(), '207801937350421')
    """

    print get_taobao_itemcats(get_top(), "50124001")
Beispiel #22
0
from pygaga.helpers.lock import lock
from pygaga.helpers.logger import log_init

if __name__ == "__main__":
    log_init("Logger")
    print "entering lock"
    with lock("mylock"):
         print "running"
    print "done"

        #import pdb; pdb.set_trace()
        numid2volume[long(num_id)] = click_item.item_volume
        click_hash = 'jn%s' % click_item.click_hash
        r2 = db.execute('select 1 from taobao_report where outer_code="%s"' % click_hash)
        if r2.rowcount:
            outercode_matched += 1
    logger.info("Total click %s creative matched %s outercode matched %s", len(click_items), creative_matched, outercode_matched)
    return click_items

def load_pay_items():
    logger.info("Loading pay items")
    db = get_db_engine()
    csv_file = open(FLAGS.pay_input)
    csv_reader = csv.reader(csv_file)
    header = csv_reader.next()
    pay_item_type = namedtuple('PayItemType', 'created name num_id shop_id shop_name count price total_price comm_rate comm tmall_rate tmall_comm total_comm status order_id')
    pay_items = []
    order_matched = 0
    for line in csv_reader:
        pay_item = pay_item_type(*line)
        pay_items.append(pay_item)
        r = db.execute("select 1 from taobao_report where trade_id=%s" % pay_item.order_id)
        if r.rowcount:
            order_matched += 1
    logger.info("Total payed %s order matched %s", len(pay_items), order_matched)
    return pay_items

if __name__ == '__main__':
    log_init(["GuangLogger","urlutils"], "sqlalchemy.*")
    est_main()
            imagemagick_resize(210, 210, big_path, mid_path)
        if not os.path.exists(sma_path):
            logger.debug("thumbing %s %s", sma_path, item)
            imagemagick_resize(60, 60, big_path, sma_path)
    except:
        logger.error("unknown error %s, %s", item, traceback.format_exc())

def crawl_page(item_id, url, headers):
    logger.debug("Crawling %s", url)
    data = ""
    try:
        req = urllib2.Request(url, headers=headers)
        u = urllib2.urlopen(req)
        data = u.read()
        u.close()
    except ValueError, e:
        logger.info("download %s:%s url value error %s", item_id, url, e.message)
    except HTTPError, e1:
        logger.info("download %s:%s failed http code: %s", item_id, url, e1.code)
    except URLError, e2:
        logger.info("download %s:%s failed url error: %s", item_id, url, e2.reason)
    except socket.timeout:
        logger.info("download %s:%s failed socket timeout", item_id, url)
    return data

if __name__ == "__main__":
    log_init("ProcessItemLogger")

    process_all_items()

Beispiel #25
0
app = web.application(urls, globals(), autoreload=True)

class home:
    def GET(self):
        db = web.database(dbn='mysql', db='guang', user='******', pw='guang', port=FLAGS.dbport, host=FLAGS.dbhost)
        result = db.select("item", what="id,num_id,detail_url,pic_url", where="status=1 and detail_url not like '%s.click.taobao.com%'", order="id desc", limit=40)
        ts = int(time.time()*1000)
        #import pdb; pdb.set_trace()
        msg = APPSECRET + 'app_key' + str(APPKEY) + "timestamp" + str(ts) + APPSECRET
        sign = hmac.HMAC(APPSECRET, msg).hexdigest().upper()
        web.setcookie('timestamp', str(ts))
        web.setcookie('sign', sign)
        return render_html("home.htm", {'items' : result,
            })

if __name__ == "__main__":
    gflags.DEFINE_boolean('webdebug', False, "is web.py debug")
    gflags.DEFINE_integer('xtaoport', 8025, "fake qq port")
    backup_args = []
    backup_args.extend(sys.argv)
    sys.argv = [sys.argv[0],] + sys.argv[2:]
    log_init('XtaoLogger', "sqlalchemy.*")
    sys.argv = backup_args[:2]
    web.config.debug = FLAGS.webdebug
    if len(sys.argv) == 1:
        web.wsgi.runwsgi = lambda func, addr=None: web.wsgi.runfcgi(func, addr)
    else:
        FLAGS.xtaoport = sys.argv[1]
    app.run()

Beispiel #26
0
    '/', 'home',
)
def my_filter(input):
    return input

ENV.filters['my_filter'] = my_filter

class home:
    def GET(self):
        return 'test'

if __name__ == "__main__":
    # usage: ${prog} ip:port --daemon --stderr ...
    gflags.DEFINE_boolean('daemon', False, "is start in daemon mode?")
    gflags.DEFINE_boolean('webdebug', False, "is web.py debug")
    gflags.DEFINE_boolean('reload', False, "is web.py reload app")
    backup_args = []
    backup_args.extend(sys.argv)
    sys.argv = [sys.argv[0],] + sys.argv[2:]
    log_init('WebLogger', "sqlalchemy.*")
    sys.argv = backup_args[:2]
    web.config.debug = FLAGS.webdebug
    if len(sys.argv) == 1:
        web.wsgi.runwsgi = lambda func, addr=None: web.wsgi.runfcgi(func, addr)
    if FLAGS.daemon:
        daemon.daemonize(os.path.join(file_path, 'web.pid'))
    #render = web.template.render('templates/', base='layout')
    app = web.application(urls, globals(), autoreload=FLAGS.reload)
    app.run()

Beispiel #27
0
                    traceback.format_exc())


def crawl_page(item_id, url, headers):
    logger.debug("Crawling %s", url)
    data = ""
    try:
        req = urllib2.Request(url, headers=headers)
        u = urllib2.urlopen(req)
        data = u.read()
        u.close()
    except ValueError, e:
        logger.info("download %s:%s url value error %s", item_id, url,
                    e.message)
    except HTTPError, e1:
        if e1.code != 404:
            logger.info("download %s:%s failed http code: %s", item_id, url,
                        e1.code)
    except URLError, e2:
        logger.info("download %s:%s failed url error: %s", item_id, url,
                    e2.reason)
    except socket.timeout:
        logger.info("download %s:%s failed socket timeout", item_id, url)
    return data


if __name__ == "__main__":
    log_init("MeiliCrawlLogger")

    crawl_all()
Beispiel #28
0
gflags.DEFINE_boolean('gt', True, "Alert if lager than/smaller than")
gflags.DEFINE_float('warnv', 0.0, "Warning thredshold")
gflags.DEFINE_float('errorv', 0.0, "Error thredshold")
gflags.DEFINE_string('since', "-1days", "From time")
gflags.DEFINE_string('until', "-", "Until time")

def check_graphite(server, target, n, warnv=0.0, errorv=0.0, gt=True, since="-1days", until="-"):
    url = "http://%s/render?format=json&from=%s&until=%s&target=%s" % (server, since, until, target)
    logger.debug("Fetching %s", url)
    data = download(url)
    json_data = simplejson.loads(data)
    data_points = json_data[0]['datapoints']
    lastn_datapoints = list(takelastn(data_points, FLAGS.lastn, lambda x:not x[0]))
    logger.debug("Last n data point %s", lastn_datapoints)
    is_warn = all_matched(lambda x:not ((x[0]>warnv) ^ gt), lastn_datapoints)
    is_error = all_matched(lambda x:not ((x[0]>errorv) ^ gt), lastn_datapoints)
    return is_warn, is_error, lastn_datapoints

def alert_main():
    is_warn, is_error, lastn_datapoints = check_graphite(FLAGS.server, FLAGS.target, FLAGS.lastn, FLAGS.warnv, FLAGS.errorv, FLAGS.gt, FLAGS.since, FLAGS.until)
    if is_error:
        logger.error("Alert %s is_gt %s:%s error %s!", FLAGS.target, FLAGS.gt, FLAGS.errorv, lastn_datapoints)
    elif is_warn:
        logger.warn("Alert %s is_gt %s:%s warning %s!", FLAGS.target, FLAGS.gt, FLAGS.warnv, lastn_datapoints)

if __name__ == "__main__":
    # usage: graphite_alert.py --pbverbose warn --use_paperboy --target xxx.xxx --warnv w --errorv e --since -1hours:%s
    log_init('AlertLogger', "sqlalchemy.*")
    alert_main()

    def test():
        log_init("CrawlLogger", "sqlalchemy.*")
        db = get_db_engine()

        # 测试新建
        theshop = ShopExtendInfo(db, 10000001)
        theshop.main_category = "服饰箱包"
        theshop.location = "浙江杭州"
        theshop.good_item_rate = 99.98
        theshop.described_remark = 4.8
        theshop.described_remark_compare = 32.13
        theshop.service_remark = 4.6
        theshop.service_remark_compare = -15.20
        theshop.support_returnin7day = 0
        theshop.support_cash = 1
        theshop.support_consumer_guarantees = 1
        theshop.support_credit_card = 0
        theshop.open_at = datetime.datetime.strptime("2013-1-1", "%Y-%m-%d")
        theshop.favorited_user_count = 1234567890
        theshop.save()

        testtheshop = ShopExtendInfo(db, 10000001)
        assert_equal(testtheshop.main_category, "服饰箱包")
        assert_equal(testtheshop.location, "浙江杭州")
        assert_equal(testtheshop.good_item_rate, 99.98)
        assert_equal(testtheshop.described_remark, 4.8)
        assert_equal(testtheshop.described_remark_compare, 32.13)
        assert_equal(testtheshop.service_remark, 4.6)
        assert_equal(testtheshop.service_remark_compare, -15.20)
        assert_equal(testtheshop.support_returnin7day, 0)
        assert_equal(testtheshop.support_cash, 1)
        assert_equal(testtheshop.support_consumer_guarantees, 1)
        assert_equal(testtheshop.support_credit_card, 0)
        assert_equal(testtheshop.open_at, datetime.date(2013, 1, 1))
        assert_equal(testtheshop.favorited_user_count, 1234567890)

        # 测试修改部分
        theshop = ShopExtendInfo(db, 10000001)
        theshop.main_category = "服饰箱包TEST"
        theshop.location = "浙江杭州TEST"
        theshop.good_item_rate = 10.98
        theshop.described_remark = 3.8
        theshop.described_remark_compare = -32.13
        theshop.service_remark = 3.6
        theshop.save()

        testtheshop = ShopExtendInfo(db, 10000001)
        assert_equal(testtheshop.main_category, "服饰箱包TEST")
        assert_equal(testtheshop.location, "浙江杭州TEST")
        assert_equal(testtheshop.good_item_rate, 10.98)
        assert_equal(testtheshop.described_remark, 3.8)
        assert_equal(testtheshop.described_remark_compare, -32.13)
        assert_equal(testtheshop.service_remark, 3.6)

        # 测试修改全部
        theshop = ShopExtendInfo(db, 10000001)
        theshop.main_category = "服饰箱包Test2"
        theshop.location = "浙江杭州Test2"
        theshop.good_item_rate = 13.98
        theshop.described_remark = 4.7
        theshop.described_remark_compare = 10.13
        theshop.service_remark = 4.8
        theshop.service_remark_compare = -12.20
        theshop.support_returnin7day = 1
        theshop.support_cash = 1
        theshop.support_consumer_guarantees = 0
        theshop.support_credit_card = 1
        theshop.open_at = datetime.datetime.strptime("2013-2-1", "%Y-%m-%d")
        theshop.favorited_user_count = 1234567891
        theshop.save()

        testtheshop = ShopExtendInfo(db, 10000001)
        assert_equal(testtheshop.main_category, "服饰箱包Test2")
        assert_equal(testtheshop.location, "浙江杭州Test2")
        assert_equal(testtheshop.good_item_rate, 13.98)
        assert_equal(testtheshop.described_remark, 4.7)
        assert_equal(testtheshop.described_remark_compare, 10.13)
        assert_equal(testtheshop.service_remark, 4.8)
        assert_equal(testtheshop.service_remark_compare, -12.20)
        assert_equal(testtheshop.support_returnin7day, 1)
        assert_equal(testtheshop.support_cash, 1)
        assert_equal(testtheshop.support_consumer_guarantees, 0)
        assert_equal(testtheshop.support_credit_card, 1)
        assert_equal(testtheshop.open_at, datetime.date(2013, 2, 1))
        assert_equal(testtheshop.favorited_user_count, 1234567891)

        # 测试修改全部
        theshop = ShopExtendInfo(db, 10000001)
        theshop.main_category = "服饰箱包Test2"
        theshop.location = "浙江杭州Test2"
        theshop.good_item_rate = 100.00
        theshop.described_remark = 4.7
        theshop.described_remark_compare = 100.00
        theshop.service_remark = 4.8
        theshop.service_remark_compare = -100.00
        theshop.shipping_remark = 5.0
        theshop.shipping_remark_compare = -100.00
        theshop.support_returnin7day = 1
        theshop.support_cash = 1
        theshop.support_consumer_guarantees = 0
        theshop.support_credit_card = 1
        theshop.open_at = datetime.datetime.strptime("2013-2-1", "%Y-%m-%d")
        theshop.favorited_user_count = 1234567891
        theshop.save()

        testtheshop = ShopExtendInfo(db, 10000001)
        assert_equal(testtheshop.main_category, "服饰箱包Test2")
        assert_equal(testtheshop.location, "浙江杭州Test2")
        assert_equal(testtheshop.good_item_rate, 100.00)
        assert_equal(testtheshop.described_remark, 4.7)
        assert_equal(testtheshop.described_remark_compare, 100.00)
        assert_equal(testtheshop.service_remark, 4.8)
        assert_equal(testtheshop.service_remark_compare, -100.00)
        assert_equal(testtheshop.shipping_remark, 5.0)
        assert_equal(testtheshop.shipping_remark_compare, -100.00)
        assert_equal(testtheshop.support_returnin7day, 1)
        assert_equal(testtheshop.support_cash, 1)
        assert_equal(testtheshop.support_consumer_guarantees, 0)
        assert_equal(testtheshop.support_credit_card, 1)
        assert_equal(testtheshop.open_at, datetime.date(2013, 2, 1))
        assert_equal(testtheshop.favorited_user_count, 1234567891)
Beispiel #30
0
            except Exception, e:
                logger.error('*** Failed to connect to %s:%d: %r, wait and retry', self.s_host, self.s_port, e)

    logger.info('Now forwarding port %d to %s:%d ...', local_port, remote_host, remote_port)
    ForwardServer(('', local_port), SubHander).serve_forever()

def connect_forward(server_host, local_port, remote_host, remote_port, username, server_port=22, keyfile=None, password=None, look_for_keys=True):
    try:
        forward_tunnel(local_port, remote_host, remote_port, server_host, server_port, username, keyfile, password, look_for_keys)
    except KeyboardInterrupt:
        return

if __name__ ==  "__main__":
    import gflags
    from pygaga.helpers.logger import log_init
    FLAGS = gflags.FLAGS

    gflags.DEFINE_integer('lport', 3306, "local port")
    gflags.DEFINE_integer('rport', 3306, "remote port")
    gflags.DEFINE_string('rhost', '192.168.10.42', "remote host")
    gflags.DEFINE_string('shost', 'log.j.cn', "server host")
    gflags.DEFINE_integer('sport', 22, "server port")
    gflags.DEFINE_string('user', 'chuansheng.song', "server username")

    FLAGS.stderr = True
    FLAGS.verbose = "info"
    FLAGS.color = True
    log_init("sshforward", "sqlalchemy.*")
    connect_forward(FLAGS.shost, FLAGS.lport, FLAGS.rhost, FLAGS.rport, FLAGS.user, FLAGS.sport)

Beispiel #31
0
                            % item_id)
                    else:
                        price = float(item['resp']['price'])
                        #title = item['resp']['title']
                        #pic_url = item['resp']['pic_url']
                        if abs(item_price -
                               price) / (item_price + 0.0000001) > 0.2 or abs(
                                   item_price - price) > 2.0:
                            change_counter += 1
                            logger.debug("Item %s/%s %s %s price %s -> %s",
                                         counter, total, item_id, item_iid,
                                         item_price, price)
                            if FLAGS.commit_price:
                                write_db.execute(
                                    "update item set price=%s where id=%s" %
                                    (price, item_id))
                logger.debug("req %s resp %s", item['req'], item['resp'])
            except:
                logger.error("update failed %s", traceback.format_exc())
    logger.info(
        "Taobao quickupdate, total %s, off %s, price change %s, volume change %s",
        total, off_counter, change_counter, vol_change_counter)


if __name__ == "__main__":
    log_init([
        'CrawlLogger',
        'TaobaoLogger',
    ], "sqlalchemy.*")
    crawl_main()
Beispiel #32
0
import logging
from pygaga.helpers.logger import log_init
from pygaga.helpers.dateutils import tomorrow

import gflags

FLAGS = gflags.FLAGS

logger = logging.getLogger('TestLogger')

if __name__ == "__main__":
    log_init("TestLogger", "sqlalchemy.*")
    print "%s %s %s" % (FLAGS.start, FLAGS.end, FLAGS.date)
    logger.debug("debug")
    logger.warn("warn")
    logger.info("info")
    logger.error("error")
Beispiel #33
0
#!/usr/bin/env python
# coding: utf-8

import os
import sys

import daemon
import gflags
import logging

from pygaga.helpers.logger import log_init
from pygaga.helpers.dbutils import get_db_engine

logger = logging.getLogger("AppLogger")

FLAGS = gflags.FLAGS


def main():
    pass


if __name__ == "__main__":
    # usage: ${prog} ip:port --daemon --stderr ...
    gflags.DEFINE_boolean("daemon", False, "is start in daemon mode?")
    log_init("AppLogger", "sqlalchemy.*")
    # if FLAGS.daemon:
    #    file_path = os.path.split(os.path.abspath(__file__))[0]
    #    daemon.daemonize(os.path.join(file_path, 'app.pid'))
    main()
Beispiel #34
0
def mul(x, y):
    '''
    >>> mul(2, 4)
    8
    '''
    return x * y

class addcls:
    """
    >>> x = addcls(5)
    >>> x + 3
    >>> x.x
    8
    """
    def __init__(self, x):
        self.x = x

    def __add__(self, y):
        self.x += y

if __name__ == "__main__":
    import doctest
    from pygaga.helpers.logger import log_init
    log_init()
    doctest.testmod(verbose=True)
Beispiel #35
0
if __name__ == "__main__":
    gflags.DEFINE_string('cookie', "/Users/chris/tmp/qqtest/cookies.txt", "cookie path", short_name="k")
    gflags.DEFINE_string('photo', "/Users/chris/tmp/1.jpg", "photo path", short_name="p")
    gflags.DEFINE_string('content', "", "post content", short_name="c")
    gflags.DEFINE_boolean('daemon', False, "run as daemon")
    gflags.DEFINE_boolean('fromdb', True, "post content from db")
    gflags.DEFINE_boolean('dryrun', False, "dry run, not post and update db")
    gflags.DEFINE_boolean('commitfail', True, "is commit status to database when failed")
    gflags.DEFINE_boolean('loop', False, "is loop forever?")
    gflags.DEFINE_boolean('timer', False, "is use timer post?")
    gflags.DEFINE_boolean('test', False, "is test mode? not post, just check")
    gflags.DEFINE_integer('sid', -1, "post one user from db")
    gflags.DEFINE_integer('interval', 20, "sleep seconds between post")
    gflags.DEFINE_integer('postinterval', 0, "sleep seconds between post")
    log_init('QzoneLogger', "sqlalchemy.*")
    #log_init('QzoneLogger', "")
    if FLAGS.daemon:
        if not FLAGS.pidfile:
            pidfile = os.path.join(os.path.split(os.path.abspath(__file__))[0], 'post.pid')
        else:
            pidfile = FLAGS.pidfile
        daemon.daemonize(pidfile)
    if not FLAGS.fromdb:
        result = post_shuoshuo(FLAGS.cookie, FLAGS.photo, FLAGS.content)
        if result:
            logger.info("Uploading content success")
            sys.exit(0)
        else:
            sys.exit(1)
    else:
                    logger.debug("process %s %s/%s -> %s", shop_id, pos, total, sql)
                    if not FLAGS.dryrun:
                        db.execute(sql.replace('%', '%%'))
                        db.execute("delete from tbk_item_convert where item_id=%s" % id)
                except KeyboardInterrupt:
                    raise
                except Exception, e:
                    logger.debug("in %s out %s" % (numid2id, result))
                    logger.warn("convert failed %s %s" % (sql, traceback.format_exc()))
        except KeyboardInterrupt:
            raise
        except:
            logger.warn("process failed %s %s reason %s" % (input, output, traceback.format_exc()))
    logger.info("Convert result %s - %s", converted, total)

if __name__ == "__main__":
    log_init(['TaobaokeLogger', 'TaobaoLogger'], "sqlalchemy.*")
    if FLAGS.action == 'remove':
        if FLAGS.all:
            do_all(rollback_shop)
        else:
            rollback_shop(FLAGS.shop, None)
    elif FLAGS.action == 'update':
        if FLAGS.all:
            do_all(update_shop)
        else:
            update_shop(FLAGS.shop, None)
    elif FLAGS.action == 'vip':
        update_vip_shop(FLAGS.shop)

Beispiel #37
0
import web
import traceback

from pygaga.helpers.logger import log_init

from guang_crawler.view import app

FLAGS = gflags.FLAGS
gflags.DEFINE_boolean('server', True, "is run as standard server")
gflags.DEFINE_boolean('fcgi', False, "is run as fcgi server")
gflags.DEFINE_string('args', "0.0.0.0:8765", "wsgi args")
gflags.DEFINE_string('crawl_path', "/space/crawler/image_crawler/static", "image path")

if __name__ == "__main__":
    try:
        argv = FLAGS(sys.argv)[1:]  # parse flags
    except gflags.FlagsError, e:
        print '%s\\nUsage: %s ARGS\\n%s' % (e, sys.argv[0], FLAGS)
        sys.exit(1)

    log_init()

    newargv = []
    newargv.append(sys.argv[0])
    newargv.append(FLAGS.args)
    sys.argv = newargv
    if FLAGS.fcgi:
        web.wsgi.runwsgi = lambda func, addr=None: web.wsgi.runfcgi(func, addr)
    app.run()

Beispiel #38
0
                    loan_obj.repayment = str(
                        loan.xpath("td[5]/text()")[0].encode("UTF-8")).strip()
                    if loan.xpath("td[7]/div/a"):
                        loan_obj.schedule = str(
                            loan.xpath("td[7]/div/a/text()")[0].encode(
                                "UTF-8")).strip().replace("%", "")
                    else:
                        loan_obj.schedule = "0"

                    loan_obj.db_create(db)

        logger.info("company %s crawler loan: new size %s, update size %s",
                    company_id, len(new_ids_set), len(update_ids_set))

        # db - 新抓取的 = 就是要下线的
        off_ids_set = db_ids_set - online_ids_set
        if off_ids_set:
            loan_obj = Loan(company_id)
            loan_obj.db_offline(db, off_ids_set)
            logger.info("company %s crawler loan: offline %s", company_id,
                        len(off_ids_set))

    except:
        logger.error("url: %s xpath failed:%s", url, traceback.format_exc())


if __name__ == "__main__":
    log_init("CrawlLogger", "sqlalchemy.*")

    crawl()
Beispiel #39
0
    logger.debug("Fetching %s", url)
    data = download(url)
    json_data = simplejson.loads(data)
    data_points = json_data[0]['datapoints']
    lastn_datapoints = list(
        takelastn(data_points, FLAGS.lastn, lambda x: not x[0]))
    logger.debug("Last n data point %s", lastn_datapoints)
    is_warn = all_matched(lambda x: not ((x[0] > warnv) ^ gt),
                          lastn_datapoints)
    is_error = all_matched(lambda x: not ((x[0] > errorv) ^ gt),
                           lastn_datapoints)
    return is_warn, is_error, lastn_datapoints


def alert_main():
    is_warn, is_error, lastn_datapoints = check_graphite(
        FLAGS.server, FLAGS.target, FLAGS.lastn, FLAGS.warnv, FLAGS.errorv,
        FLAGS.gt, FLAGS.since, FLAGS.until)
    if is_error:
        logger.error("Alert %s is_gt %s:%s error %s!", FLAGS.target, FLAGS.gt,
                     FLAGS.errorv, lastn_datapoints)
    elif is_warn:
        logger.warn("Alert %s is_gt %s:%s warning %s!", FLAGS.target, FLAGS.gt,
                    FLAGS.warnv, lastn_datapoints)


if __name__ == "__main__":
    # usage: graphite_alert.py --pbverbose warn --use_paperboy --target xxx.xxx --warnv w --errorv e --since -1hours:%s
    log_init('AlertLogger', "sqlalchemy.*")
    alert_main()