コード例 #1
0
ファイル: neeq_company_parser_2.py プロジェクト: yujiye/Codes
def get_android_domain(app_market, app_id):
    domain = None
    if app_market == 16010 or app_market == 16020:
        android_app = parser_db_util.find_android_market(app_market, app_id)
        if android_app:
            domain = android_app["apkname"]
    else:
        domain = app_id
    return domain
コード例 #2
0
def parse_company(item):
    logger.info("parse_company")
    company_key = item["postdata"]["id"]

    #company basic info
    c = item["data"]["basic"]

    tags = c["tags"]

    tags_str = tags.replace("|",",")

    logo=c["icon"]
    if logo.find("product_default.png") >= 0:
        logo = None

    establish_date = None
    if c.has_key("open_time"):
        try:
            establish_date = datetime.datetime.strptime(c["open_time"], "%Y-%m-%d")
        except:
            pass

    address1 = None
    address2 = None
    if c.has_key("city"):
        address2 = c["city"]
    if c.has_key("province"):
        address1 = c["province"]

    location_id = 0
    if address2!=None and address2.strip()!="":
        location = parser_db_util.get_location(address2)
        if location != None:
            location_id= location["locationId"]

    if location_id==0 and address1 != None and address1.strip()!="":
        location = parser_db_util.get_location(address1)
        if location != None:
            location_id = location["locationId"]

    fullName = c["company"]
    if fullName is None or fullName.strip() == "":
        fullName = None
    else:
        fullName = fullName.replace("_","")
        idx = fullName.rfind(u"公司")
        if idx != -1:
            fullName = fullName[:(idx+len(u"公司"))]
        fullName = name_helper.company_name_normalize(fullName)

    name = c["product"]
    desc = ""
    brief = ""
    productDesc = None
    modelDesc = None
    operationDesc = None
    teamDesc = None
    marketDesc = None
    compititorDesc = None
    advantageDesc = None
    planDesc = None
    otherDesc = None


    if c.has_key("desc"):  # 其他
        # otherDesc = c["intro"].strip()
        desc = c["desc"].strip()

    if c.has_key("yewu"):  # 其他
        # otherDesc = c["intro"].strip()
        brief = c["yewu"].strip()

    if name is None or fullName is None:
        return {
            "status": "No_Name",
        }

    artifacts = []
    websites = []
    if c.has_key("gw_link") is True and c["gw_link"].strip() !="" and c["gw_link"] not in websites:
        websites.append(c["gw_link"])
    if c.has_key("source_gw_link") is True and c["source_gw_link"].strip() != "" and c["source_gw_link"] not in websites:
        websites.append(c["source_gw_link"])
    if item["data"].has_key("productinfos") is True:
        for pi in item["data"]["productinfos"]:
            if pi.has_key("link") is True and pi["link"].strip() !="" and pi["link"] not in websites:
                websites.append(pi["link"])

    for website in websites:
        type, app_market, app_id = url_helper.get_market(website)
        if type == 4010:
            if item["url"] != website and website.find("qimingpian.com") == -1:
                flag, domain = url_helper.get_domain(website)
                if flag is not None:
                    if flag is False:
                        domain = None
                    artifacts.append({
                        "type": 4010,
                        "name": name,
                        "description": brief,
                        "link": website,
                        "domain": domain
                    })
        elif type == 4020 or type == 4030:
            domain = None
            if domain is not None:
                artifacts.append({
                    "type": type,
                    "name": name,
                    "description": brief,
                    "link": website,
                    "domain": domain
                })
        elif type == 4040:
            domain = app_id
            if domain is not None:
                artifacts.append({
                    "type": 4040,
                    "name": name,
                    "description": brief,
                    "link": website,
                    "domain": domain
                })
        elif type == 4050:
            domain = None
            if app_market == 16010 or app_market == 16020:
                android_app = parser_db_util.find_android_market(app_market, app_id)
                if android_app:
                    domain = android_app["apkname"]
            else:
                domain = app_id
            if domain is not None:
                artifacts.append({
                    "type": 4050,
                    "name": name,
                    "description": brief,
                    "link": website,
                    "domain": domain
                })

    return {
        "name": name,
        "fullName": fullName,
        "description": desc,
        "productDesc": productDesc,
        "modelDesc": modelDesc,
        "operationDesc": operationDesc,
        "teamDesc": teamDesc,
        "marketDesc": marketDesc,
        "compititorDesc": compititorDesc,
        "advantageDesc": advantageDesc,
        "planDesc": planDesc,
        "otherDesc": otherDesc,
        "brief": brief,
        "round": 0,
        "roundDesc": None,
        "companyStatus": 2010,
        'fundingType': 0,
        "locationId": location_id,
        "address": None,
        "phone": None,
        "establishDate": establish_date,
        "logo": logo,
        "source": SOURCE,
        "sourceId": company_key,
        "field": None,
        "subField": None,
        "tags": tags_str,
        "headCountMin": None,
        "headCountMax": None,
        "artifacts": artifacts,

    }
コード例 #3
0
ファイル: chuangyepu_company.py プロジェクト: yujiye/Codes
def parse_company(item):

    logger.info("*** base ***")
    company_key = item["key"]
    html = item["content"]
    logger.info(company_key)
    d = pq(html)

    logo = d('.logo-block > img').attr('src')
    if logo == 'http://assets3.chuangyepu.com/chuangyepu/images/big-screenshot.png':
        logo = None

    basic_info = d('div.col-md-9> div> table> tr> td').eq(1)
    #logger.info(basic_info)
    name = pq(basic_info)('div.name').text().strip()
    brief = pq(basic_info)('div.desc').eq(0).text().strip()
    if name is None:
        return {
            "status": "No_Name",
        }
    #logger.info(name+" "+brief)
    try:
        website = pq(basic_info)('div.desc').eq(1)('a').text().strip()
    except:
        website = None

    #logger.info("website: %s",website)

    #parser artifact

    tags = pq(basic_info)('div.line-block').text().strip().replace(" ", ",")
    #logger.info(tags)

    main_blocks = d('div.col-md-9> div.col-sm-12')
    h4s = d('div.col-md-9> h4')
    logger.info("main: %d, h4: %d", len(main_blocks), len(h4s))

    #产品介绍/团队成员/媒体报道/融资历史
    if len(h4s) != len(main_blocks) - 1:
        return {
            "status": "No_Data",
        }

    desc = None
    round = None
    roundDesc = None
    source_fundings = []

    for i in xrange(len(h4s)):
        h4 = h4s.eq(i).text().strip()
        d = main_blocks.eq(i + 1)
        #DESC
        if h4 == "产品介绍":
            desc = d('div.content> div> p.desc').text().strip()
        #parser finance
        if h4 == "融资历史":
            lines = d('table> tr')
            for li in lines:
                line = pq(li)
                if line.text().find("时间") >= 0:
                    continue
                #logger.info(line)

                date = line('td.investment_date> span').text().strip() + "/01"
                try:
                    fundingDate = datetime.datetime.strptime(date, '%Y/%m/%d')
                except:
                    fundingDate = None
                #logger.info(fundingDate)

                roundStr = line('td.investment-round').text().strip()
                fundingRound, roundStr = chuangyepu_helper.getFundingRound(
                    roundStr)
                #logger.info("fundingRound=%d, roundStr=%s", fundingRound, roundStr)

                moneyStr = line('td.money').text().strip()
                (currency, investment,
                 precise) = chuangyepu_helper.getMoney(moneyStr)
                #logger.info("%s - %s - %s" % (currency, investment, precise))

                fs = line('td').eq(3)('p> a')
                investors = []
                for f in fs:
                    iv = pq(f)
                    investor_url = iv.attr("href")
                    investor_name = iv.text().strip()
                    if investor_name is not None and investor_url is not None and investor_url != "" and investor_url.find(
                            "institutions") >= 0:
                        investor_key = investor_url.strip().split("/")[-1]

                        investor = {"name": investor_name, "key": investor_key}
                        investors.append(investor)
                source_funding = {
                    "investment": investment,
                    "precise": precise,
                    "round": fundingRound,
                    "roundDesc": roundStr,
                    "currency": currency,
                    "fundingDate": fundingDate,
                    "investors": investors
                }
                #logger.info(json.dumps(source_funding, ensure_ascii=False, cls=util.CJsonEncoder))

                source_fundings.append(source_funding)
                if round is None or round < fundingRound:
                    round = fundingRound
                    roundDesc = roundStr

        if h4 == "团队成员":
            #not accurate member infos
            pass
        if h4 == "媒体报道":
            pass

    artifacts = []

    if desc is None:
        desc = brief

    if brief is not None and len(brief.decode('utf-8')) > 200:
        brief = None

    type, app_market, app_id = url_helper.get_market(website)
    if type == 4010:
        if item["url"] != website:
            flag, domain = url_helper.get_domain(website)
            if flag is not None:
                if flag is False:
                    domain = None
                artifacts.append({
                    "type": 4010,
                    "name": name,
                    "description": desc,
                    "link": website,
                    "domain": domain
                })
    elif type == 4040:
        domain = app_id
        if domain is not None:
            artifacts.append({
                "type": 4040,
                "name": name,
                "description": desc,
                "link": website,
                "domain": domain
            })
    elif type == 4050:
        domain = None
        if app_market == 16010 or app_market == 16020:
            android_app = parser_db_util.find_android_market(
                app_market, app_id)
            if android_app:
                domain = android_app["apkname"]
        else:
            domain = app_id
        if domain is not None:
            artifacts.append({
                "type": 4050,
                "name": name,
                "description": desc,
                "link": website,
                "domain": domain
            })

    #logger.info("Desc: %s", desc)
    #logger.info("round: %s, roundDesc: %s", round, roundDesc)

    source_company = {
        "name": name,
        "fullName": None,
        "description": desc,
        "productDesc": None,
        "modelDesc": None,
        "operationDesc": None,
        "teamDesc": None,
        "marketDesc": None,
        "compititorDesc": None,
        "advantageDesc": None,
        "planDesc": None,
        "brief": brief,
        "round": round,
        "roundDesc": roundDesc,
        "companyStatus": 2010,
        'fundingType': 0,
        "locationId": None,
        "address": None,
        "phone": None,
        "establishDate": None,
        "logo": logo,
        "source": SOURCE,
        "sourceId": company_key,
        "field": None,
        "subField": None,
        "tags": None,
        "headCountMin": None,
        "headCountMax": None,
        "artifacts": artifacts,
        "fundings": source_fundings,
        "status": 1
    }

    #for i in source_company:
    #    logger.info("%s -> %s", i, source_company[i])
    return source_company
コード例 #4
0
def parse_company(item):
    if item is None:
        return None

    logger.info("*** base ***")
    company_key = item["key"]
    html = item["content"]
    logger.info(company_key)
    d = pq(html)

    # logo_id processed in parser_db_util
    '''
    logo_id = None
    if logo_url is not None:
        logo_id = parser_util.get_logo_id(source, company_key, 'company', logo_url)
    '''

    if html.decode("utf-8").find("这个公司的主页还在建设") >= 0:
        return {
            "status": "No_Name",
        }
    name = d('.company_main > h1 > a').text()
    link = d('.company_main > h1 > a').attr('href')
    fullName = d('.company_main > h1 > a').attr('title')
    fullName = name_helper.company_name_normalize(fullName)
    if name is None or fullName is None or name.find("拉勾") >= 0:
        return {
            "status": "No_Name",
        }
    if len(name) > len(fullName):
        name = fullName
    if name is None or name.strip() == "":
        name = fullName

    chinese, companycheck = name_helper.name_check(fullName)
    if companycheck is not True:
        return {
            "status": "No_Name",
        }
    logo = d('.top_info_wrap > img').attr('src')

    if logo.startswith("http") or logo.startswith("https"):
        pass
    else:
        logo = "http:" + logo

    if logo.find("logo_default") >= 0:
        logo = None

    brief = d('.company_word').text()
    desc_text = d('.company_intro_text').text()

    if u"该公司尚未添加公司介绍" in desc_text or len(desc_text) < 10:
        desc = None
    else:
        desc = d('.company_intro_text > .company_content').html()
        desc = desc.replace('<span class="text_over">展开</span>', '')

        soup = BeautifulSoup(desc, "lxml")
        raw = soup.getText()

        # logger.info(desc)
        #logger.info(raw)

        desc = raw

    # if desc is None or desc.strip() == "":
    #     return {
    #         "status": "No_Name",
    #     }
    field = ''
    stage = ''
    headCount = ''
    location = ''
    address = ''
    try:
        field = d(
            '#basic_container > .item_content >ul > li:eq(0) > span').text()
        stage = d(
            '#basic_container > .item_content >ul > li:eq(1) > span').text()
        headCount = d(
            '#basic_container > .item_content >ul > li:eq(2) > span').text()
        headCount = headCount[0:headCount.index(u'人')]
        location = d(
            '#basic_container > .item_content >ul > li:eq(3) > span').text()
        address = d('.con_mlist_ul > li:eq(0) > p:eq(1)').text()
    except:
        pass

    headCount = headCount.replace("people", "")

    if headCount == "少于15":
        min_staff = 1
        max_staff = 15
    else:
        staffarr = headCount.split('-')
        if len(staffarr) > 1:
            min_staff = staffarr[0]
            max_staff = staffarr[1]
        else:
            try:
                min_staff = int(staffarr[0].strip())
                max_staff = None
            except:
                min_staff = None
                max_staff = None

    funding_type = 0
    if stage == '不需要融资':
        stage = 0
        funding_type = 8010
    elif stage == '未融资':
        stage = 0
    elif stage == '天使轮':
        stage = 1010
    elif stage == 'A轮':
        stage = 1030
    elif stage == 'B轮':
        stage = 1040
    elif stage == 'C轮':
        stage = 1050
    elif stage == 'D轮及以上':
        stage = 1060
    elif stage == '上市公司':
        stage = 1110
    else:
        stage = 0

    location_id = 0
    location_new = parser_db_util.get_location(location)
    if location_new != None:
        location_id = location_new["locationId"]

    #website = util.norm_url(link)
    website = url_helper.url_normalize(link)
    logger.info("website: %s" % website)

    artifacts = []
    type, app_market, app_id = url_helper.get_market(website)
    if type == 4010:
        if item["url"] != website and website.find("lagou.com") == -1:
            flag, domain = url_helper.get_domain(website)
            if flag is not None:
                if flag is False:
                    domain = None
                artifacts.append({
                    "type": 4010,
                    "name": name,
                    "description": desc,
                    "link": website,
                    "domain": domain
                })
    elif type == 4020 or type == 4030:
        domain = None
        if domain is not None:
            artifacts.append({
                "type": type,
                "name": name,
                "description": desc,
                "link": website,
                "domain": domain
            })
    elif type == 4040:
        domain = app_id
        if domain is not None:
            artifacts.append({
                "type": 4040,
                "name": name,
                "description": desc,
                "link": website,
                "domain": domain
            })
    elif type == 4050:
        domain = None
        if app_market == 16010 or app_market == 16020:
            android_app = parser_db_util.find_android_market(
                app_market, app_id)
            if android_app:
                domain = android_app["apkname"]
        else:
            domain = app_id
        if domain is not None:
            artifacts.append({
                "type": 4050,
                "name": name,
                "description": desc,
                "link": website,
                "domain": domain
            })

    source_company = {
        "name":
        name,
        "fullName":
        fullName if fullName is not None and fullName.strip() != "" else None,
        "description":
        desc,
        "productDesc":
        None,
        "modelDesc":
        None,
        "operationDesc":
        None,
        "teamDesc":
        None,
        "marketDesc":
        None,
        "compititorDesc":
        None,
        "advantageDesc":
        None,
        "planDesc":
        None,
        "brief":
        brief,
        "round":
        None,
        "roundDesc":
        None,
        "companyStatus":
        2010,
        'fundingType':
        funding_type,
        "locationId":
        location_id,
        "address":
        address,
        "phone":
        None,
        "establishDate":
        None,
        "logo":
        logo,
        "source":
        SOURCE,
        "sourceId":
        company_key,
        "field":
        field,
        "subField":
        None,
        "tags":
        None,
        "headCountMin":
        min_staff,
        "headCountMax":
        max_staff,
        "artifacts":
        artifacts,
        "status":
        1
    }

    return source_company
コード例 #5
0
ファイル: itjuzi_company_parser.py プロジェクト: yujiye/Codes
def parse_base(item):
    if item is None:
        return None

    logger.info("*** base ***")
    company_key = item["key"]
    html = item["content"]
    #logger.info(html)
    d = pq(html)

    company_short_name = ""
    product_name = d('div.line-title> span> h1').clone().children().remove().end().text().strip()
    if product_name is None or product_name.strip() == "":
        product_name = d('div.line-title> span> b').clone().children().remove().end().text().strip()
    temps = product_name.split("/",1)
    if len(temps) == 2:
        product_name = temps[0].strip()
        company_short_name = temps[1].strip()
    if company_short_name == "":
        company_short_name = product_name
    logger.info("product name: %s" % product_name)
    logger.info("company short name: %s" % company_short_name)

    company_name = d('div.des-more> div').eq(0).text().strip().replace("公司全称:","")
    if company_name == "暂无" or company_name == "暂未收录":
        company_name = ""

    if company_name is None or company_name.strip() == "":
        try:
            company_name = d('div.des-more> h2').text().strip()
        except:
            pass
    if company_name == "暂无" or company_name == "暂未收录":
        company_name = ""

    company_name = name_helper.company_name_normalize(company_name)
    logger.info("company name: %s" % company_name)

    if company_short_name == "" and company_name == "":
        return

    establish_date = None
    str = d('div.des-more> div').eq(1).text().strip().replace("成立时间:","")
    result = util.re_get_result('(\d*)\.(\d*)',str)
    if result != None:
        (year, month) = result
        try:
            if int(month) > 12:
                month = "1"
        except:
            month = "1"
        establish_date = datetime.datetime.strptime("%s-%s-1" % (year,month), '%Y-%m-%d')
    logger.info("establish date: %s" % establish_date)

    locationId=0
    str = d('span.loca').text().strip()
    #logger.info(str)
    result = util.re_get_result(u'(.*?)·(.*?)$',str)
    if result != None:
        (province, city) = result
        province = province.strip()
        city = city.strip()
        logger.info("location: %s-%s" % (province, city))

        locationId = 0
        result = parser_db_util.get_location(city)
        if result != None:
            locationId = result["locationId"]
        else:
            result = parser_db_util.get_location(province)
            if result != None:
                locationId = result["locationId"]

    if locationId == 0:
        loc1,loc2 = name_helper.get_location_from_company_name(company_name)
        if loc1 is not None:
            result = parser_db_util.get_location(loc1)
            if result != None:
                locationId = result["locationId"]
    logger.info("locationId: %d" % locationId)

    company_status = 2010
    str = d('div.des-more> div').eq(2).text().strip()
    if str == "已关闭":
        company_status = 2020
    logger.info("company_status: %d" % company_status)

    funding_type = 0
    str = d("span.tag.bg-c").text().strip()
    logger.info("融资需求: %s" % str)
    if str == "融资需求 · 需要融资":
        funding_type = 8020
    elif str == "融资需求 · 寻求收购":
        funding_type = 8020
    logger.info("funding_type=%d" % funding_type)
    try:
        brief = d("h2.seo-slogan").text().strip()
    except:
        brief = ""
    logger.info("brief: %s" % brief)

    if brief.find("暂未收录"):
        brief = ""
    field = d("span.scope.c-gray-aset> a").eq(0).text().strip()
    logger.info("field: %s" % field)

    sub_field = d("span.scope.c-gray-aset> a").eq(1).text().strip()
    logger.info("sub field: %s" % sub_field)

    tags = d("div.tagset.dbi.c-gray-aset> a >span").text().strip().replace(" ",",")
    logger.info("tags: %s" % tags)

    desc = d("div.des,div.desc,div.introduction,div.abstract,div.summary").text().\
        replace("购买数据请联系","").replace('*****@*****.**',"").replace("itjuzi是一家数据服务公司","").strip()
    logger.info("********desc: %s" % desc)

    #logo
    logo = d("div.pic >img").attr("src")
    #if logo:
    #    logo = logo.replace("http://", "https://")
    logger.info("logo: %s", logo)


    # website = d('div.link-line> a').text().strip()
    # if website is None or website == "":
    #     website = d('div.link-line> a.webTink').text().strip()
    # if website is None or website == "":
    #     try:
    #         logger.info("here")
    #         website = d('div.link-line> span.weblink> a').eq(1).text().strip()
    #         logger.info(website)
    #     except:
    #         pass
    artifacts = []
    for ty in [1,2,3]:
        if ty == 1:
            was = d('div.link-line> a')
        else:
            was = d('div.link-line> span.weblink,span.webTink> a')

        for wa in was:
            webs =[]

            try:
                website = pq(wa).attr("href").strip()
                if website=="http://%e6%9a%82%e6%97%a0" or website == "http://tt":
                    website = ""
                website = url_helper.url_normalize(website)
                logger.info("website: %s" % website)
                webs.append(website)
            # else:

            #     website = pq(wa).text().strip()
            except:
                pass
            try:
                website = pq(wa).text().strip()
                if website == "http://%e6%9a%82%e6%97%a0" or website == "http://tt":
                    website = ""
                website = url_helper.url_normalize(website)
                logger.info("website: %s" % website)
                webs.append(website)
            # else:
            #     website = pq(wa).text().strip()
            except:
                pass

            #
            # if website=="http://%e6%9a%82%e6%97%a0":
            #     website = ""
            # website = url_helper.url_normalize(website)
            # logger.info("website: %s" % website)

            # artifacts = []
            for website in webs:
                type, app_market, app_id = url_helper.get_market(website)
                if type == 4010:
                    flag, domain = url_helper.get_domain(website)
                    if flag is not None:
                        if flag is False:
                            domain = None
                        artifacts.append({
                            "type":4010,
                            "name":product_name,
                            "desc":desc,
                            "link":website,
                            "domain": domain
                        })

                elif type == 4020:
                    domain = app_id
                    if domain is not None:
                        artifacts.append({
                            "type": 4020,
                            "name": product_name,
                            "desc": None,
                            "link": website,
                            "domain": website
                        })

                elif type == 4030:
                    domain = app_id
                    if domain is not None:
                        artifacts.append({
                            "type": 4030,
                            "name": product_name,
                            "desc": None,
                            "link": website,
                            "domain": None
                        })

                elif type == 4040:
                    domain = app_id
                    if domain is not None:
                        artifacts.append({
                                "type":4040,
                                "name":product_name,
                                "desc":desc,
                                "link":website,
                                "domain": domain
                        })
                elif type == 4050:
                    domain = None
                    if app_market == 16010 or app_market == 16020:
                        android_app = parser_db_util.find_android_market(app_market, app_id)
                        if android_app:
                            domain = android_app["apkname"]
                    else:
                        domain = app_id
                    if domain is not None:
                        artifacts.append({
                            "type":4050,
                            "name":product_name,
                            "desc":desc,
                            "link":website,
                            "domain": domain
                        })


    #获投状态
    roundStr = d('span.t-small.c-green').text().replace("(","").replace(")","").replace("获投状态:","").strip()
    fundingRound,roundStr = itjuzi_helper.getFundingRound(roundStr)
    logger.info("获投状态: %d, %s", fundingRound, roundStr)

    logger.info("")


    return {
        "shortName": company_short_name,
        "fullName": company_name if company_name is not None and company_name.strip() != "" else None,
        "productName": product_name,
        "description": desc,
        "brief": brief,
        "round": fundingRound,
        "roundDesc": roundStr,
        "companyStatus": company_status,
        "fundingType": funding_type,
        "locationId": locationId,
        "establishDate": establish_date,
        "logo": logo,
        "sourceId": company_key,
        "field": field,
        "subField": sub_field,
        "tags": tags,
        "type":41010,
        "artifacts":artifacts
    }
コード例 #6
0
ファイル: itjuzi_company_parser.py プロジェクト: yujiye/Codes
def parse_artifact(item):
    if item is None:
        return None

    artifacts = []
    company_key = item["key"]
    html = item["content"]
    #logger.info(html)
    d = pq(html)

    #artifact
    logger.info("*** artifact ***")
    lis = d('ul.list-prod> li> div.on-edit-hide')
    for li in lis:
        l = pq(li)
        strtype = l('h4> span.tag').text().strip()
        #logger.info(strtype)
        if strtype != u"网站" and strtype != "app":
            continue

        link = l('h4> b> a').attr("href").strip()
        if link == "":
            continue

        domain = None
        type = None
        if strtype == u"网站":
            type, app_market, app_id = url_helper.get_market(link)
            if type == 4010:
                link = url_helper.url_normalize(link)
                flag, domain = url_helper.get_domain(link)
                if flag is None:
                    continue
                if flag is False:
                    domain = None

        if type != 4010:
            type, app_market, app_id = url_helper.get_market(link)
            if type == 4040:
                domain = app_id
            elif type == 4050:
                if app_market == 16010 or app_market == 16020:
                    android_app = parser_db_util.find_android_market(app_market, app_id)
                    if android_app:
                        domain = android_app["apkname"]
                else:
                    domain = app_id
            if domain is None and type !=4030 and type != 4020:
                continue

        name = l('h4> b').text().strip()
        desc = l('p').text().strip()
        logger.info("type: %s, name: %s, link: %s, desc: %s" % (type, name,link,desc))
        artifact = {
            "type":type,
            "name":name,
            "desc":desc,
            "link":link,
            "domain": domain
        }
        artifacts.append(artifact)

    logger.info("")
    return artifacts