def insertProduct(cursor, quota=1, ignore=-1, _sqliteName=None):
    sqliteName = "store.sqlite"
    if _sqliteName != None:
        sqliteName = _sqliteName

    db = sqlite3.connect(sqliteName)
    allProducts = db.execute('select product_id, product_title, product_intro, product_cover_img, product_thumbnail from products_info').fetchall()

    count = 0
    ignore_count = 0

    for product in allProducts:
        ignore_count += 1
        if ignore_count <= ignore:
            continue

        # _id, title, introtext, thumbnails = news
        product_id, product_title, product_intro, product_cover_img, product_thumbnail = product
        # print product_id, product_title, product_intro, product_cover_img, product_thumbnail

        thumbnails = None
        if product_cover_img != None and len(product_cover_img.strip()) > 0:
            thumbnails = product_cover_img
        else:
            try:
                thumbnails = eval(product_thumbnail)[0]
            except Exception, e:
                pass

        if thumbnails == None:
            print "未找到图片,跳过", product_id
            continue

        # print "thumbnails", thumbnails
            

        # images是 erji_tz_portfolio_xref_content需要
        images = downloadNewsThumbnails(product_id, thumbnails)
        print "insert images", images
        if images == None:
            print "下载", thumbnails, "失败!"
            continue

        asset_id = insertIntoAssets(cursor, product_title)


        # 获取full text
        product_intro, full_text = db.execute('select product_intro, product_detail from products_view where product_id='+str(product_id)).fetchone()
        content_id = insertIntoContent(cursor, asset_id, product_title, product_intro, full_text, 14)

        if content_id <= 0:
            continue

        insert_xref_content(cursor, content_id, images)

        # 插入tags
        _detail = product_title + " " + product_intro + " " + full_text
        Tags.parserTags(_detail, cursor, content_id)

        count += 1
        if count >= quota:
            break
Beispiel #2
0
                http_pattern = r'http\:\/\/[\s\S]*?\"'
                pattern = re.compile(http_pattern)
                match = re.search(http_pattern, buy_url_detail)
                if match:
                    buy_url = match.group().strip()
        except Exception, e:
            print "get buy url error:", e

        if db:
            CREATE_PRODUCT_VIEW_TABLE = 'CREATE TABLE IF NOT EXISTS "products_view" ("product_id" INTEGER PRIMARY KEY  NOT NULL  UNIQUE, "product_intro" TEXT, "product_detail" TEXT, "product_thumbnails" TEXT, "buy_url" TEXT, "tags" TEXT )'
            try:
                db.execute(CREATE_PRODUCT_VIEW_TABLE)

                INSERT_COMMAND = "insert into products_view values (?,?,?,?,?,?)"
                _detail = self.product_title + " " + _product_intro +" " +product_detail
                db.execute(INSERT_COMMAND, (self.product_id, _product_intro, product_detail, str(product_thumbnails), buy_url, str(Tags.parserTags(_detail, None, self.product_id))))
                # print _product_intro
                # db.commit()
            except Exception, e:
                print "insert product view error:", e

    def toTuple(self):
        return (
            self.product_id, 
            self.product_name, 
            self.product_title, 
            self.product_intro, 
            self.comment_count,
            self.like_count,
            self.product_cover_img,
            self.eval_num,