def __init__(self): self.db = dbcon.connect_torndb()
sys.path.append( os.path.join(os.path.split(os.path.realpath(__file__))[0], '../../util')) import loghelper, config, util, db, url_helper sys.path.append( os.path.join( os.path.split(os.path.realpath(__file__))[0], '../crawler/website')) import website #logger loghelper.init_logger("delete_dup_deal", stream=True) logger = loghelper.get_logger("delete_dup_deal") if __name__ == "__main__": conn = db.connect_torndb() items = conn.query( "select companyId,organizationId,count(*) cnt from deal group by companyId,organizationId having cnt > 1" ) for item in items: deals = conn.query( "select * from deal where companyId=%s and organizationId=%s", item["companyId"], item["organizationId"]) i = 0 for deal in deals: i += 1 if i > 1: logger.info("delete: %s - %s", deal["id"], deal["status"]) conn.execute("delete from deal_user_score where dealId=%s", deal["id"]) conn.execute("delete from deal_user_rel where dealId=%s",
os.path.join(os.path.split(os.path.realpath(__file__))[0], '../util')) # import loghelper import db from time import strftime, localtime from datetime import timedelta, datetime, date from bson.code import Code #logger # loghelper.init_logger("stat_person_summary", stream=True) # logger = loghelper.get_logger("stat_person_summary") #mongo mongo = db.connect_mongo() conn_sql = db.connect_torndb() # source table user_log = mongo.log.user_log # 每日用户统计 stat_person_daily_users = mongo.log.stat_person_daily_users stat_person_daily = mongo.log.stat_person_daily EXCLUDE_IP = "116.226.185.172" def stat_person_users_daily(): startDate = date.today() endDate = date.today() + timedelta(days=1) start = datetime.now() end = datetime.now()
def process(): id = -1 conn = db.connect_torndb() fullnames = conn.query( "select a.investorId,a.name,i.website,i.name as shortname from investor_alias a " " join investor i on a.investorId=i.id" " where a.type=12010 " " and (a.active is null or a.active='Y')" " and (i.active is null or i.active='Y')" " and a.verify='Y'") while True: fs = conn.query( "select * from funding where id>%s order by id limit 1000", id) if len(fs) == 0: break for f in fs: funding_id = f["id"] if funding_id > id: id = funding_id str_investors = f["investors"] if str_investors is None or str_investors == "": continue logger.info(str_investors) investors = json.loads(str_investors.replace("\n", ",")) _investors = [] flag = False for s in investors: if s["type"] == "text": text = s["text"] if len(text) < 4: _investors.append(s) continue if contain_company_name(text) is False: _investors.append(s) continue names = find_all_investor_fullnames(fullnames, text) if len(names) > 0: #logger.info(text) #logger.info(names) result = structure(text, names) result = remove_dup(result) #logger.info(result) #logger.info("") _investors.extend(result) flag = True else: _investors.append(s) else: _investors.append(s) if flag: logger.info(str_investors) _str_investors = json.dumps(_investors, ensure_ascii=False) logger.info(_str_investors) _str_raw_investors = gen_raw_str(_investors) logger.info(_str_raw_investors) logger.info("") corp = conn.get("select * from company where id=%s", f["companyId"]) logger.info("companyId: %s, companyName: %s", corp["id"], corp["name"]) conn.execute("set autocommit=0") for inv in _investors: if inv["type"] == "investor": rel = conn.get( "select * from funding_investor_rel" " where (active is null or active='Y')" " and fundingId=%s and investorId=%s", funding_id, inv["id"]) if rel is None: logger.info("insert rel: %s", inv["text"]) conn.insert( "insert funding_investor_rel(fundingId,investorId,createUser,verify,active,createTime) values(%s,%s,139,'Y','Y',now())", funding_id, inv["id"]) conn.update( "update funding set investorsRaw=%s, investors=%s where id=%s", _str_raw_investors, _str_investors, funding_id) conn.execute("commit") # exit() conn.close()
def fundingP_check(): de = [] conn = db.connect_torndb() mongo = db.connect_mongo() # cnt_event = 0 # corporateIds = [] # companyIds = [] # cnt_event_stock = 0 # cnt_event_nestock = 0 for year in ["2013", "2014", "2015", "2016", "2017"]: cnt_event = 0 corporateIds = [] companyIds = [] cnt_event_stock = 0 cnt_event_nestock = 0 cnt_d = 0 cnt_f = 0 cds = [] cfs = [] css = [] cnes = [] if year == "2016": fundings = conn.query("select * from funding where (active is null or active !='N') and " "(" "(publishDate is not null and publishDate>='2016-01-01' and publishDate<'2017-01-01')" " or " "(publishDate is null and fundingDate>='2016-01-01' and fundingDate<'2017-01-01')" ")") elif year == "2015": fundings = conn.query("select * from funding where (active is null or active !='N') and " "(" "(publishDate is not null and publishDate>='2015-01-01' and publishDate<'2016-01-01')" " or " "(publishDate is null and fundingDate>='2015-01-01' and fundingDate<'2016-01-01')" ")") elif year == "2014": fundings = conn.query("select * from funding where (active is null or active !='N') and " "(" "(publishDate is not null and publishDate>='2014-01-01' and publishDate<'2015-01-01')" " or " "(publishDate is null and fundingDate>='2014-01-01' and fundingDate<'2015-01-01')" ")") elif year == "2013": fundings = conn.query("select * from funding where (active is null or active !='N') and " "(" "(publishDate is not null and publishDate>='2013-01-01' and publishDate<'2014-01-01')" " or " "(publishDate is null and fundingDate>='2013-01-01' and fundingDate<'2014-01-01')" ")") else: fundings = conn.query("select * from funding where (active is null or active !='N') and " "(" "(publishDate is not null and publishDate>='2017-01-01' and publishDate<'2018-01-01')" " or " "(publishDate is null and fundingDate>='2017-01-01' and fundingDate<'2018-01-01')" ")") for funding in fundings: if funding is not None and funding["corporateId"] is not None: # logger.info(funding) corporate = conn.get("select * from corporate where id=%s and (active is null or active !='N')", funding["corporateId"]) cs = conn.query("select id from company where corporateId=%s and (active is null or active!='N')", funding["corporateId"]) if len(cs) > 0 and corporate is not None: cnt_event += 1 if funding["round"] is not None and funding["round"] in [1105,1106,1110]: cnt_event_stock += 1 if funding["corporateId"] not in css: css.append(funding["corporateId"]) else: cnt_event_nestock += 1 if funding["corporateId"] not in cnes: cnes.append(funding["corporateId"]) if funding["corporateId"] not in corporateIds: corporateIds.append(funding["corporateId"]) if corporate["locationId"] is not None and corporate["locationId"] > 370: cnt_f += 1 if funding["corporateId"] not in cfs: cfs.append(funding["corporateId"]) else: cnt_d += 1 if funding["corporateId"] not in cds: cds.append(funding["corporateId"]) for c in cs: # logger.info(c) if c["id"] not in companyIds: companyIds.append(c["id"]) # break cnt_company = len(companyIds) cnt_corporate = len(corporateIds) de.extend([{"item": year + "披露总融资事件数", "count": cnt_event}, {"item": year + "披露总上市融资事件数", "count": cnt_event_stock}, {"item": year + "披露总非上市融资事件数", "count": cnt_event_nestock}, {"item": year + "披露总上市融资corporate数", "count": len(css)}, {"item": year + "披露总非上市融资corporate数", "count": len(cnes)}, {"item": year + "披露总涉及项目数", "count": cnt_company}, {"item": year + "披露总涉及corporate数", "count": cnt_corporate}, {"item": year + "披露国内融资事件数", "count": cnt_d}, {"item": year + "披露国外融资事件数", "count": cnt_f}, {"item": year + "披露总国内涉及corporate数", "count": len(cds)}, {"item": year + "披露总国外涉及corporate数", "count": len(cfs)}, ]) conn.close() mongo.close() return de
def patch_company_establish_date(company_id): conn = db.connect_torndb() mongo = db.connect_mongo() collection_gongshang = mongo.info.gongshang company1 = conn.get("select * from company where id=%s", company_id) establish_date = None if company1["corporateId"] is not None: corporate = conn.get("select * from corporate where id=%s", company1["corporateId"]) if corporate is not None and corporate["fullName"] is not None: gongshang = collection_gongshang.find_one( {"name": corporate["fullName"]}) if gongshang is not None and gongshang.has_key("establishTime"): try: if establish_date is None or ( gongshang["establishTime"] is not None and gongshang["establishTime"] != establish_date): establish_date = gongshang["establishTime"] except: pass if establish_date is None: aliases = conn.query( "select * from corporate_alias where " "(active is null or active !='N') and corporateId=%s", company1["corporateId"]) for alias in aliases: gongshang = collection_gongshang.find_one( {"name": alias["name"]}) if gongshang is not None and gongshang.has_key( "establishTime"): try: if establish_date is None or ( gongshang["establishTime"] is not None and gongshang["establishTime"] != establish_date): establish_date = gongshang["establishTime"] except: pass if establish_date is not None: break if establish_date is not None: logger.info("Company: %s establishDate: %s", company_id, establish_date) try: conn.update( "update corporate set establishDate=%s where id=%s", establish_date, company1["corporateId"]) except: pass #patch round if corporate is not None: funding = conn.get( "select * from funding where corporateId=%s and (active is null or active !='N') " "order by fundingDate desc limit 1", corporate["id"]) if funding is not None: # corporate = conn.get("select * from corporate where id=%s", corporate_id) # if corporate is not None: conn.update("update corporate set round=%s where id=%s", funding["round"], corporate["id"]) else: if corporate["round"] is not None: conn.update("update corporate set round=-1 where id=%s", corporate["id"]) conn.close() mongo.close()