def send_verify_code(mobile, content): flag = False if mobile.startswith("110"): return True try: url = "http://sms.combmobile.com/sdk/gxmt" pwd = util.md5str(SN+PWD).upper() payload = { "sn": SN, "pwd": pwd, "mobile": mobile, "content": content.encode("gb2312"), "ext": "", "stime": "", "rrid": "" } r = requests.post(url, data=payload) #logger.info(r.text) root = etree.fromstring(r.text.encode("utf8")) if root.tag == '{http://tempuri.org/}string': return_code = root.text return_code_int = int(return_code) if return_code_int > 0: flag = True logger.info("sent ok. return code: %s", return_code) else: logger.info("sent fail. return code: %s", return_code) except: traceback.print_exc() return flag
def insert(shortname, name,brief,website): name = name.replace("(开业)","") sourceId = util.md5str(name) sid = parser_db_util.save_company_yitai(shortname, name,13100,sourceId,brief) logger.info("sid:%s->sourceId:%s",sid, sourceId) parser_db_util.save_source_company_name(sid, name, 12010) parser_db_util.save_source_company_name(sid, shortname, 12020)
def findc(aname): rvalue = 0 conn = db.connect_torndb() aname = aname.replace("(开业)", "") sourceId = util.md5str(aname) sc = conn.get( "select * from source_company where source=13100 and sourceId=%s", sourceId) if sc is None: logger.info("wrong") exit() companyId = sc["companyId"] company = conn.get("select * from company where id=%s", companyId) scs = conn.query("select * from source_company where companyId=%s", companyId) # if len(scs) == 1 and scs[0]["source"] == 13096 and company is not None: if company is not None and company["active"] in ["A", "P", "N"]: # conn.update("update company set brief=%s,locationId=2 where id=%s", brief, companyId) # conn.update("update corporate set brief=%s,locationId=2 where id=%s", brief, company["corporateId"]) # if company["active"] == "A": rvalue = 1 # conn.update("update company set brief=%s,locationId=2 where id=%s", brief,companyId) # conn.update("update corporate set brief=%s,locationId=2 where id=%s", brief, company["corporateId"]) conn.close() return rvalue, companyId
def __init__(self, id, author, nickname, like_num, content): self.id = int(id) self.author = int(author) self.nickname = nickname self.num_likes = int(like_num) self.content = content self.md5_content = md5str(content)
def insert(shortname, name, brief, website): name = name.replace("(开业)", "") sourceId = util.md5str(name) sid = parser_db_util.save_company_yitai(shortname, name, 13100, sourceId, brief) logger.info("sid:%s->sourceId:%s", sid, sourceId) parser_db_util.save_source_company_name(sid, name, 12010) parser_db_util.save_source_company_name(sid, shortname, 12020) if website is not None and website.strip() != "": website = url_helper.url_normalize(website) if website is not None and website != "": if website.find("http://") == -1 and website.find("https://"): website = "http://" + website type, market, app_id = url_helper.get_market(website) if type == 4010: if website.find('sse.com') > 0: pass else: artifact = { "sourceCompanyId": sid, "name": shortname, "description": None, "link": website, "domain": app_id, "type": type } parser_db_util.save_artifacts_standard(sid, [artifact])
def insert_funding(sid, roundstr, inv, fundingDate, investor): try: inv = "".join(inv.split()) if inv in ["超千万人民币", "千万人民币", "近千万人民币", "过千万人民币", "上千万人民币", "1千万人民币"]: inv = "1000万人民币" elif inv in [ "超亿人民币", "近亿人民币", "过亿人民币", "上亿人民币", "亿人民币", "一亿人民币", "亿人民币及以上人民币" ]: inv = "1亿人民币" elif inv in ["超千万美元", "千万美元", "近千万美元", "过千万美元", "上千万美元", "1千万美元"]: inv = "1000万美元" elif inv in ["百万美元", "近百万美元", "过百万美元", "上百万美元", "1百万美元"]: inv = "100万美元" elif inv in ["百万人民币", "近百万人民币", "过百万人民币", "上百万人民币", "1百万人民币"]: inv = "100万人民币" if roundstr == "re-A轮": roundstr = "Pre-A" elif roundstr == "re-IPO": roundstr = "Pre-IPO" fundingRound, roundStr = itjuzi_helper.getFundingRound( unicode(roundstr)) currency, investment, precise = itjuzi_helper.getMoney(unicode(inv)) source_funding = { "sourceCompanyId": sid, "preMoney": None, "postMoney": None, "investment": investment, "precise": precise, "round": fundingRound, "roundDesc": roundStr, "currency": currency, "fundingDate": fundingDate, "newsUrl": None } source_investors = [] source_investor = { "name": investor, "website": None, "description": None, "logo_url": None, "stage": None, "field": None, "type": 10020, "source": 13100, "sourceId": util.md5str(investor) } source_investors.append(source_investor) parser_db_util.save_funding_standard(source_funding, download_crawler, source_investors) # logger.info("%s/%s-------%s/%s/%s/%s", roundstr, inv, fundingRound, investment,precise,currency) except: logger.info("%s/%s/%s/%s", roundstr, inv, fdate, investor) # exit() pass
def insert(shortname,brief): sourceId = util.md5str(unicode(shortname)) sid = parser_db_util.save_company_yitai(shortname, None,13120,sourceId,brief) logger.info("sid:%s->sourceId:%s",sid, sourceId) parser_db_util.save_source_company_name(sid, shortname, 12020) # for fullName in [name] + fullNames: # parser_db_util.save_source_company_name(sid, fullName, 12010) return sid
def insert(shortname, name, brief, fullNames): name = name.replace("(开业)", "") sourceId = util.md5str(name) sid = parser_db_util.save_company_yitai(shortname, name, 13100, sourceId, brief) # logger.info("sid:%s->sourceId:%s",sid, sourceId) parser_db_util.save_source_company_name(sid, shortname, 12020) for fullName in [name] + fullNames: parser_db_util.save_source_company_name(sid, fullName, 12010) return sid
def add_2_company_list(name): name = name_helper.company_name_normalize(name) if name is None: return chinese, company = name_helper.name_check(name) if chinese is True and company is True: logger.info("fullname: %s", name) name_md5 = util.md5str(name) c = mongo.info.company_idx.find_one({"name_md5": name_md5}) if c is None: data = { "name": name, "name_md5": name_md5, "createTime": datetime.datetime.utcnow() } mongo.info.company_idx.insert_one(data)
def fragmentstr(inputstr, type): retlst = [] strmd5 = util.md5str(inputstr) bufLst = [] strIndex = 0 while True: onebuf = inputstr[strIndex:strIndex+BUFLEN] if len(onebuf) == 0: break bufLst.append(onebuf) strIndex += BUFLEN blockId = 0 for onebuf in bufLst: oneMap = {'type':type, 'md5':strmd5, 'blockId':blockId, 'totalBlock':len(bufLst), 'buf':onebuf} retlst.append(oneMap) blockId = blockId + 1 return retlst
def create_user(conn, item): SALT = "24114581331805856724" # disable old user user_id = item["userId"] conn.update( "update user set active='D', phoneVerify='N', emailVerify='N' where id=%s", user_id) # user phoneVerify = 'N' if item["phone"] is not None and item["phone"].strip != "": phoneVerify = 'Y' emailVerify = 'N' if item["email"] is not None and item["email"].strip != "": emailVerify = 'Y' new_user_id = conn.insert( "insert user(username,position,email,phone,userIdentify,loginFailTimes," "phoneVerify,emailVerify,active,verifiedInvestor,createTime) values" "(%s,%s,%s,%s,%s,0," "%s,%s,'Y','N',now())", item["username"], item.get("position"), item["email"], item["phone"], item["userIdentify"], phoneVerify, emailVerify) password = util.md5str(SALT + str(new_user_id) + item["password"]) conn.update("update user set password=%s where id=%s", password, new_user_id) # organization (personal) org_id = conn.insert( "insert organization(name,type,status,grade,active,createUser,createTime,modifyUser,modifyTime) " "values(%s, 17010,31010,33020,'Y',%s,now(),%s,now())", item["username"], new_user_id, new_user_id) # user_organization_rel conn.insert( "insert user_organization_rel(userId,organizationId,active,createTime) values(%s,%s,'Y',now())", new_user_id, org_id) return new_user_id
def parserDevelop_save(source_company_id, item): if item is None: return logger.info("*** Development ***") html = item["content"] d = pq(html) #news & footprint lis = d('.history_ul > li') develop_rank = 0 if len(lis) > 0: d_date = None for li in lis: try: d = pq(li) d_day = d('.date_day').text() d_year = d('.date_year').text() d_month = None if d_year is not None: d_month = d_year[5:].strip() if d_month == 'Jan': d_month = '01' elif d_month == 'Feb': d_month = '02' elif d_month == 'Mar': d_month = '03' elif d_month == 'Apr': d_month = '04' elif d_month == 'May': d_month = '05' elif d_month == 'Jun' or d_month == 'June': d_month = '06' elif d_month == 'Jul' or d_month == 'July': d_month = '07' elif d_month == 'Aug': d_month = '08' elif d_month == 'Sep' or d_month == 'Sept': d_month = '09' elif d_month == 'Oct': d_month = '10' elif d_month == 'Nov': d_month = '11' elif d_month == 'Dec': d_month = '12' d_year = d_year[0:4] d_type = d('div.li_type_icon').attr('title') d_title = d('div.li_desc > p').text() d_url = d('div.li_desc > p').attr('data-href') d_key = util.md5str(d_url) if d_year is None or d_year == '': d_date = d_date else: d_date = d_year + '-' + d_month + '-' + d_day logger.info("date: %s", d_date) develop_rank += 1 develop_key = str(item["key"]) + '_' + str(develop_rank) if d_type == u'资本': pass if d_type == u'其他': logger.info("********其他") # news if d_url is not None and d_url.strip() != '' and len( d_url) > 10: logger.info(d_url) pass #footprint else: logger.info("FOOTPRINT") if d_date is None: continue source_footprint = [{ "source": SOURCE, "sourceCompanyId": source_company_id, "footDate": d_date, "footDesc": d_title, }] # logger.info(json.dumps(source_footprint, ensure_ascii=False, cls=util.CJsonEncoder)) # parser_db_util.save_footprints(source_company_id, source_footprint) except Exception, ex: logger.exception(ex)
def parser_develop(d, company_key, source_company_id): logger.info('*********** parsing develop **************') #news & footprint lis = d('.history_ul > li') develop_rank = 0 if len(lis) > 0: for li in lis: try: d_day = d('.date_day').text() d_year = d('.date_year').text() if d_year is not None: d_month = d_year[5:].strip() if d_month == 'Jan': d_month = '01' elif d_month == 'Feb': d_month = '02' elif d_month == 'Mar': d_month = '03' elif d_month == 'Apr': d_month = '04' elif d_month == 'May': d_month = '05' elif d_month == 'Jun' or d_month == 'June': d_month = '06' elif d_month == 'Jul' or d_month == 'July': d_month = '07' elif d_month == 'Aug': d_month = '08' elif d_month == 'Sep' or d_month == 'Sept': d_month = '09' elif d_month == 'Oct': d_month = '10' elif d_month == 'Nov': d_month = '11' elif d_month == 'Dec': d_month = '12' d_year = d_year[0:4] d_type = d('div.li_type_icon').attr('title') d_title = d('div.li_desc > p').text() d_url = d('div.li_desc > p').attr('data-href') d_key = util.md5str(d_url) if d_year is None or d_year == '': d_date = None else: d_date = d_year+'-'+d_month+'-'+d_day develop_rank += 1 develop_key = str(company_key)+'_'+str(develop_rank) if d_type == u'资本': investors = d('.desc_intro').text() investors = ''.join(investors) try: (investors,) = util.re_get_result(u'融资机构:(.*?) ;', investors) except Exception, e: investors = None investment = None round = None unit = None currency = '' precise = 'Y' funding = d_title.replace('获得','').replace('轮融资', '') logger.info(funding) try: funding = funding.replace('元', '') if u'亿' in funding: f_arr = funding.split(u'亿') if '.' in f_arr[0]: investment = int(f_arr[0].replace('.', '')) * 1000 else: investment = int(f_arr[0]) * 10000 round = f_arr[1] unit = 0 else: try: (investment,) = util.re_get_result('(\d+)', funding) except Exception, e: pass if investment is not None: round = funding.split(investment)[1] if currency == '美': currency = 3010 else: currency = 3020 if '¥'in str(investment): currency = 3020 elif '$' in str(investment): currency = 3020 investment = str(investment).replace('$', '').replace('¥', '')\ .replace('美金', '').replace('美', '') if u'数' in funding: precise = 'N' if round is None: round = funding.replace('数', '') if u'千' in round: investment = 1000 unit = 0 elif u'百' in round: investment = 100 unit = 0 elif u'十' in round: investment = 10 unit = 0 if unit == 0: investment = int(investment) * 10000 if investment is None: investment = 0 if investment == 0: precise ='N' elif investment < 1000: investment = int(investment) * 10000 roundDesc = round round = round.replace('万', '').replace('千', '').replace('百', '').replace('十', '') round = round.replace('美', '') if u'天使' in round: round = 1010 elif 'Pre-A' in round: round = 1020 elif 'A' in round: round = 1030 elif 'B' in round: round = 1040 elif 'C' in round: round = 1050 elif 'D' in round: round = 1060 elif 'E' in round: round = 1070 elif 'F' in round: round = 1080 else: round = 0 logger.info(investment) logger.info(round) source_funding ={ "sourceCompanyId": source_company_id, "preMoney": None, "postMoney": None, "investment": investment, "round": round, "roundDesc": roundDesc, "currency": currency, "precise": precise, "fundingDate": d_date, } # logger.info(source_funding) logger.info(investors) investor_list = [] if investors is not None: investors_arr = investors.split(',') investor_key = 0 for investor in investors_arr: if investor != '': investor_key += 1 sourceId = str(company_key)+'_'+ str(investor_key) if '个人' in investor: type = 10010 else: type = 10020 invstor_content = {'source': source, 'sourceId': sourceId, 'logo_url': None, 'name': investor, 'website': None, 'description': None, 'stage': None, 'field': None, 'type': type, 'source': source, 'sourceId': sourceId } investor_list.append(invstor_content) parser_util.insert_source_funding(source_funding, investor_list) except Exception,e: logger.exception(e) continue if d_type == u'其他': # news if d_url is not None or d_url != '' or len(d_url) > 10: try: r = requests.get(d_url, timeout= 10) r.encoding = r.apparent_encoding content = r.text # print content[0:500] source_news = {"source": source, "news_key": d_key, "company_key": company_key, "url": d_url, "title": d_title, "date": d_date, "domain": 'lagou', "content": content } parser_util.insert_source_news(source_news) except Exception,e : pass #footprint else: source_footprint = {"source": source, "sourceCompanyId": source_company_id, "footDate":d_date, "description": d_title, } parser_util.insert_source_footprint(source_footprint)
def insert(name): name = name.replace("(开业)","") sourceId = util.md5str(name) sid = parser_db_util.save_company_fullName(name,13097,sourceId) logger.info("sid:%s->sourceId:%s",sid, sourceId) parser_db_util.save_source_company_name(sid, name, 12010)
def process(org): if org["coldcall_imap_server"] is None: return logger.info("orgId: %s, orgName: %s", org["id"], org["name"]) re_name = re.compile( '([\[\(] *)?(RE?S?|FYI|RIF|I|FS|VB|RV|ENC|ODP|PD|YNT|ILT|SV|VS|VL|AW|WG|ΑΠ|ΣΧΕΤ|ΠΡΘ|תגובה|הועבר|主题|转发|FWD?) *([-:;)\]][ :;\])-]*|$)|\]+ *$', re.IGNORECASE) while True: msgs = email_reader.receive(org["coldcall_imap_server"], org["coldcall_imap_port"], org["coldcall_username"], org["coldcall_password"], one=True) if len(msgs) == 0: break for msg in msgs: if msg["html"] is not None: parser = html2text.HTML2Text() parser.ignore_emphasis = True parser.single_line_break = True msg["html_text"] = parser.handle(msg["html"]) else: msg["html_text"] = None logger.info(msg["subject"]) logger.info(msg["from"]) logger.info(msg["to"]) logger.info(msg["cc"]) # logger.info(msg["body"]) # logger.info(msg["html_text"]) logger.info("attachments=%d" % len(msg["attachments"])) for attach in msg["attachments"]: logger.info(attach.name) title = re_name.sub('', msg["subject"]).strip() title_md5 = util.md5str(title) #insert conn = db.connect_torndb() cc = conn.get( "select * from sourcedeal where orgId=%s and titleMd5=%s and origin=%s limit 1", org["id"], title_md5, msg["from"]) conn.close() if cc is not None: logger.info("%s Exists!" % title) continue content = msg["html_text"] if content is None: content = msg["body"] if content is None: content = "" content = content.strip() if len(content) > 20000: content = content[0:20000] sponsor_id = find_user(org["id"], msg["from"]) logger.info("sponsor_id=%s" % sponsor_id) assignee_id = find_user(org["id"], msg["cc"]) logger.info("assignee_id=%s" % assignee_id) conn = db.connect_torndb() cc_id = conn.insert( "insert sourcedeal(title,titleMd5,content,orgId,createTime,origin,assignee,sponsor) \ values(%s,%s,%s,%s,%s,%s,%s,%s)", title, title_md5, content, org["id"], msg["date"], msg["from"], assignee_id, sponsor_id) if assignee_id is None: ids = get_investment_manager_ids(org["id"]) assignee_id = choice(ids) conn.update("update sourcedeal set assignee=%s where id=%s", assignee_id, cc_id) conn.insert( "insert sourcedeal_forward(sourcedealId,toUserId,createTime) " "values(%s,%s,%s)", cc_id, assignee_id, msg["date"]) else: conn.insert( "insert sourcedeal_forward(sourcedealId,fromUserId,toUserId,createTime) " "values(%s,%s,%s,%s)", cc_id, sponsor_id, assignee_id, msg["date"]) for attach in msg["attachments"]: if attach.name is not None and attach.name.strip() != "": name = attach.name.strip() if not name.lower().endswith("pdf") and \ not name.lower().endswith("rar") and \ not name.lower().endswith("zip") and \ not name.lower().endswith("7z") and \ not name.lower().endswith("ppt") and \ not name.lower().endswith("pptx") and \ not name.lower().endswith("doc") and \ not name.lower().endswith("docx") and \ not name.lower().endswith("xls") and \ not name.lower().endswith("xlsx"): continue (content_type, encoding) = mimetypes.guess_type(name) if content_type is None: content_type = "application/octet-stream" data = attach.getvalue() # mongo = db.connect_mongo() # imgfs = gridfs.GridFS(mongo.gridfs) # logo_id = imgfs.put(data, content_type=content_type, filename=name) # mongo.close() logo_id = util.get_uuid() logger.info("gridfs logo_id=%s" % logo_id) oss2 = oss2_helper.Oss2Helper() headers = {"Content-Type": content_type} oss2.put(str(logo_id), data, headers=headers) conn.insert( "insert sourcedeal_file(sourcedealId,filename,fileId,createTime) " "values(%s,%s,%s,%s)", cc_id, name, logo_id, msg["date"]) conn.close()
msg["html_text"] = parser.handle(msg["html"]) else: msg["html_text"] = None logger.info(msg["subject"]) logger.info(msg["from"]) logger.info(msg["to"]) logger.info(msg["cc"]) #logger.info(msg["body"]) #logger.info(msg["html_text"]) logger.info("attachments=%d" % len(msg["attachments"])) for attach in msg["attachments"]: logger.info(attach.name) name = re_name.sub('', msg["subject"]).strip() name_md5 = util.md5str(name) cc = conn.get( "select * from coldcall where organizationId=%s and nameMd5=%s limit 1", org["id"], name_md5) if cc is not None: logger.info("%s Exists!" % name) continue content = msg["html_text"] if content is None: content = msg["body"] if content is None: content = "" content = content.strip() cc_id = conn.insert(
def parserDevelop_save(source_company_id, item): if item is None: return logger.info("*** Development ***") html = item["content"] d = pq(html) #news & footprint lis = d('.history_ul > li') develop_rank = 0 if len(lis) > 0: d_date = None for li in lis: try: d = pq(li) d_day = d('.date_day').text() d_year = d('.date_year').text() d_month = None if d_year is not None: d_month = d_year[5:].strip() if d_month == 'Jan': d_month = '01' elif d_month == 'Feb': d_month = '02' elif d_month == 'Mar': d_month = '03' elif d_month == 'Apr': d_month = '04' elif d_month == 'May': d_month = '05' elif d_month == 'Jun' or d_month == 'June': d_month = '06' elif d_month == 'Jul' or d_month == 'July': d_month = '07' elif d_month == 'Aug': d_month = '08' elif d_month == 'Sep' or d_month == 'Sept': d_month = '09' elif d_month == 'Oct': d_month = '10' elif d_month == 'Nov': d_month = '11' elif d_month == 'Dec': d_month = '12' d_year = d_year[0:4] d_type = d('div.li_type_icon').attr('title') d_title = d('div.li_desc > p').text() d_url = d('div.li_desc > p').attr('data-href') d_key = util.md5str(d_url) if d_year is None or d_year == '': d_date = d_date else: d_date = d_year + '-' + d_month + '-' + d_day logger.info("date: %s", d_date) develop_rank += 1 develop_key = str(item["key"]) + '_' + str(develop_rank) if d_type == u'资本': pass # comment Investment # investors = d('.desc_intro').text() # investors = ''.join(investors) # #logger.info("investors_initial: %s", investors) # try: # if investors.find("融资金额") >= 0: # (investors,) = util.re_get_result(u'融资机构:(.*?) ;', investors) # else: # #logger.info("find here %s",investors) # (investors,) = util.re_get_result(u'融资机构:(.*?)$', investors) # except Exception, e: # investors = None # # investment = None # round = None # unit = None # currency = '' # precise = 'Y' # # funding = d_title.replace('获得','').replace('轮融资', '') # logger.info(funding) # # try: # funding = funding.replace('元', '') # if u'亿' in funding: # f_arr = funding.split(u'亿') # # if '.' in f_arr[0]: # investment = int(f_arr[0].replace('.', '')) * 1000 # else: # investment = int(f_arr[0]) * 10000 # # round = f_arr[1] # unit = 0 # else: # try: # (investment,) = util.re_get_result('(\d+)', funding) # # except: # pass # # if investment is not None: # round = funding.split(investment)[1] # # # if funding.find('美') > 0: # currency = 3010 # else: # currency = 3020 # # if funding.find('¥') > 0: # currency = 3020 # elif funding.find('$') > 0: # currency = 3010 # # if investment is not None: # investment = str(investment).replace('$', '').replace('¥', '')\ # .replace('美金', '').replace('美', '') # # # if u'数' in funding: # precise = 'N' # # if round is None: # round = funding.replace('数', '') # # if u'千' in round: # investment = 1000 # unit = 0 # elif u'百' in round: # investment = 100 # unit = 0 # elif u'十' in round: # investment = 10 # unit = 0 # # if unit == 0: # investment = int(investment) * 10000 # # if investment is None: # investment = 0 # # if investment == 0: # precise ='N' # else: # try: # if int(investment)< 10000: # investment = int(investment) * 10000 # except: # pass # logger.info("investment %s", investment) # # round = round.replace('万', '').replace('千', '').replace('百', '').replace('十', '') # round = round.replace('美', '') # # roundDesc=None # if u'天使' in round: # round = 1010 # roundDesc="天使" # elif 'Pre-A' in round: # round = 1020 # roundDesc = "Pre-A" # elif 'A' in round: # round = 1030 # roundDesc = "A" # elif 'B' in round: # round = 1040 # roundDesc = "B" # elif 'C' in round: # round = 1050 # roundDesc = "C" # elif 'D' in round: # round = 1060 # roundDesc = "D" # elif 'E' in round: # round = 1070 # roundDesc = "E" # elif 'F' in round: # round = 1080 # roundDesc = "F" # else: # round = 0 # # # logger.info("round %s", round) # logger.info("roundDesc %s",roundDesc) # # # source_funding ={ # "sourceCompanyId": source_company_id, # "preMoney": None, # "postMoney": None, # "investment": investment, # "round": round, # "roundDesc": roundDesc, # "currency": currency, # "precise": precise, # "fundingDate": d_date, # } # # # # logger.info(source_funding) # # logger.info(investors) # investor_list = [] # if investors is not None: # investors = investors.replace(" " , "").replace("," , ",").replace("、" , ",")\ # .replace("跟投","").replace("领投","") # investors_arr = investors.split(",") # investor_key = 0 # for investor in investors_arr: # #logger.info("investor:%s", investor) # if investor.find("、") >= 0: # continue # if investor != '': # logger.info("investor:%s",investor) # investor_key += 1 # sourceId = develop_key+'_'+ str(investor_key) # # if '个人' in investor: # type = 10010 # else: # type = 10020 # # invstor_content = { # 'logo_url': None, # 'name': investor, # 'website': None, # 'description': None, # 'stage': None, # 'field': None, # 'type': type, # 'source': SOURCE, # 'sourceId': sourceId # } # # investor_list.append(invstor_content) # # parser_db_util.save_funding_standard(source_funding, download_crawler, investor_list) # # # # except Exception,e: # logger.exception(e) if d_type == u'其他': logger.info("********其他") # news if d_url is not None and d_url.strip() != '' and len( d_url) > 10: logger.info(d_url) pass ''' try: r = requests.get(d_url, timeout= 10) r.encoding = r.apparent_encoding content = r.text # print content[0:500] source_news = {"source": source, "news_key": d_key, "company_key": company_key, "url": d_url, "title": d_title, "date": d_date, "domain": 'lagou', "content": content } parser_util.insert_source_news(source_news) except Exception,e : pass ''' #footprint else: logger.info("FOOTPRINT") if d_date is None: continue source_footprint = [{ "source": SOURCE, "sourceCompanyId": source_company_id, "footDate": d_date, "footDesc": d_title, }] logger.info( json.dumps(source_footprint, ensure_ascii=False, cls=util.CJsonEncoder)) parser_db_util.save_footprints(source_company_id, source_footprint) except Exception, ex: logger.exception(ex)