예제 #1
0
    def __init__(self):

        self.db = dbcon.connect_torndb()
예제 #2
0
sys.path.append(
    os.path.join(os.path.split(os.path.realpath(__file__))[0], '../../util'))
import loghelper, config, util, db, url_helper

sys.path.append(
    os.path.join(
        os.path.split(os.path.realpath(__file__))[0], '../crawler/website'))
import website

#logger
loghelper.init_logger("delete_dup_deal", stream=True)
logger = loghelper.get_logger("delete_dup_deal")

if __name__ == "__main__":
    conn = db.connect_torndb()
    items = conn.query(
        "select companyId,organizationId,count(*) cnt from deal group by companyId,organizationId having cnt > 1"
    )
    for item in items:
        deals = conn.query(
            "select * from deal where companyId=%s and organizationId=%s",
            item["companyId"], item["organizationId"])
        i = 0
        for deal in deals:
            i += 1
            if i > 1:
                logger.info("delete: %s - %s", deal["id"], deal["status"])
                conn.execute("delete from deal_user_score where dealId=%s",
                             deal["id"])
                conn.execute("delete from deal_user_rel where dealId=%s",
예제 #3
0
    os.path.join(os.path.split(os.path.realpath(__file__))[0], '../util'))
# import loghelper
import db

from time import strftime, localtime
from datetime import timedelta, datetime, date

from bson.code import Code

#logger
# loghelper.init_logger("stat_person_summary", stream=True)
# logger = loghelper.get_logger("stat_person_summary")

#mongo
mongo = db.connect_mongo()
conn_sql = db.connect_torndb()
# source table
user_log = mongo.log.user_log
# 每日用户统计
stat_person_daily_users = mongo.log.stat_person_daily_users

stat_person_daily = mongo.log.stat_person_daily

EXCLUDE_IP = "116.226.185.172"


def stat_person_users_daily():
    startDate = date.today()
    endDate = date.today() + timedelta(days=1)
    start = datetime.now()
    end = datetime.now()
예제 #4
0
def process():
    id = -1
    conn = db.connect_torndb()
    fullnames = conn.query(
        "select a.investorId,a.name,i.website,i.name as shortname from investor_alias a "
        " join investor i on a.investorId=i.id"
        " where a.type=12010 "
        " and (a.active is null or a.active='Y')"
        " and (i.active is null or i.active='Y')"
        " and a.verify='Y'")
    while True:
        fs = conn.query(
            "select * from funding where id>%s order by id limit 1000", id)
        if len(fs) == 0:
            break
        for f in fs:
            funding_id = f["id"]
            if funding_id > id:
                id = funding_id
            str_investors = f["investors"]
            if str_investors is None or str_investors == "":
                continue
            logger.info(str_investors)
            investors = json.loads(str_investors.replace("\n", ","))

            _investors = []
            flag = False
            for s in investors:
                if s["type"] == "text":
                    text = s["text"]
                    if len(text) < 4:
                        _investors.append(s)
                        continue
                    if contain_company_name(text) is False:
                        _investors.append(s)
                        continue
                    names = find_all_investor_fullnames(fullnames, text)
                    if len(names) > 0:
                        #logger.info(text)
                        #logger.info(names)
                        result = structure(text, names)
                        result = remove_dup(result)
                        #logger.info(result)
                        #logger.info("")
                        _investors.extend(result)
                        flag = True
                    else:
                        _investors.append(s)
                else:
                    _investors.append(s)

            if flag:
                logger.info(str_investors)
                _str_investors = json.dumps(_investors, ensure_ascii=False)
                logger.info(_str_investors)
                _str_raw_investors = gen_raw_str(_investors)
                logger.info(_str_raw_investors)
                logger.info("")
                corp = conn.get("select * from company where id=%s",
                                f["companyId"])
                logger.info("companyId: %s, companyName: %s", corp["id"],
                            corp["name"])
                conn.execute("set autocommit=0")
                for inv in _investors:
                    if inv["type"] == "investor":
                        rel = conn.get(
                            "select * from funding_investor_rel"
                            " where (active is null or active='Y')"
                            " and fundingId=%s and investorId=%s", funding_id,
                            inv["id"])
                        if rel is None:
                            logger.info("insert rel: %s", inv["text"])
                            conn.insert(
                                "insert funding_investor_rel(fundingId,investorId,createUser,verify,active,createTime) values(%s,%s,139,'Y','Y',now())",
                                funding_id, inv["id"])
                conn.update(
                    "update funding set investorsRaw=%s, investors=%s where id=%s",
                    _str_raw_investors, _str_investors, funding_id)
                conn.execute("commit")
                # exit()

    conn.close()
예제 #5
0
def fundingP_check():
    de = []
    conn = db.connect_torndb()
    mongo = db.connect_mongo()

    # cnt_event = 0
    # corporateIds = []
    # companyIds = []
    # cnt_event_stock = 0
    # cnt_event_nestock = 0

    for year in ["2013", "2014", "2015", "2016", "2017"]:
        cnt_event = 0
        corporateIds = []
        companyIds = []
        cnt_event_stock = 0
        cnt_event_nestock = 0
        cnt_d = 0
        cnt_f = 0
        cds = []
        cfs = []
        css = []
        cnes = []
        if year == "2016":
            fundings = conn.query("select * from funding where (active is null or active !='N') and "
                                  "("
                                  "(publishDate is not null and publishDate>='2016-01-01' and publishDate<'2017-01-01')"
                                  " or "
                                  "(publishDate is null and fundingDate>='2016-01-01' and fundingDate<'2017-01-01')"
                                  ")")
        elif year == "2015":
            fundings = conn.query("select * from funding where (active is null or active !='N') and "
                                  "("
                                  "(publishDate is not null and publishDate>='2015-01-01' and publishDate<'2016-01-01')"
                                  " or "
                                  "(publishDate is null and fundingDate>='2015-01-01' and fundingDate<'2016-01-01')"
                                  ")")
        elif year == "2014":
            fundings = conn.query("select * from funding where (active is null or active !='N') and "
                                  "("
                                  "(publishDate is not null and publishDate>='2014-01-01' and publishDate<'2015-01-01')"
                                  " or "
                                  "(publishDate is null and fundingDate>='2014-01-01' and fundingDate<'2015-01-01')"
                                  ")")

        elif year == "2013":
            fundings = conn.query("select * from funding where (active is null or active !='N') and "
                                  "("
                                  "(publishDate is not null and publishDate>='2013-01-01' and publishDate<'2014-01-01')"
                                  " or "
                                  "(publishDate is null and fundingDate>='2013-01-01' and fundingDate<'2014-01-01')"
                                  ")")

        else:
            fundings = conn.query("select * from funding where (active is null or active !='N') and "
                                  "("
                                  "(publishDate is not null and publishDate>='2017-01-01' and publishDate<'2018-01-01')"
                                  " or "
                                  "(publishDate is null and fundingDate>='2017-01-01' and fundingDate<'2018-01-01')"
                                  ")")

        for funding in fundings:
            if funding is not None and funding["corporateId"] is not None:
                # logger.info(funding)
                corporate = conn.get("select * from corporate where id=%s and (active is null or active !='N')",
                                     funding["corporateId"])
                cs = conn.query("select id from company where corporateId=%s and (active is null or active!='N')",
                                funding["corporateId"])

                if len(cs) > 0 and corporate is not None:
                    cnt_event += 1
                    if funding["round"] is not None and funding["round"] in [1105,1106,1110]:
                        cnt_event_stock += 1
                        if funding["corporateId"] not in css:
                            css.append(funding["corporateId"])
                    else:
                        cnt_event_nestock += 1
                        if funding["corporateId"] not in cnes:
                            cnes.append(funding["corporateId"])

                    if funding["corporateId"] not in corporateIds:
                        corporateIds.append(funding["corporateId"])

                    if corporate["locationId"] is not None and corporate["locationId"] > 370:
                        cnt_f += 1
                        if funding["corporateId"] not in cfs:
                            cfs.append(funding["corporateId"])
                    else:
                        cnt_d += 1
                        if funding["corporateId"] not in cds:
                            cds.append(funding["corporateId"])


                    for c in cs:
                        # logger.info(c)
                        if c["id"] not in companyIds:
                            companyIds.append(c["id"])
                            # break
        cnt_company = len(companyIds)
        cnt_corporate = len(corporateIds)

        de.extend([{"item": year + "披露总融资事件数", "count": cnt_event},
                   {"item": year + "披露总上市融资事件数", "count": cnt_event_stock},
                   {"item": year + "披露总非上市融资事件数", "count": cnt_event_nestock},
                   {"item": year + "披露总上市融资corporate数", "count": len(css)},
                   {"item": year + "披露总非上市融资corporate数", "count": len(cnes)},
                   {"item": year + "披露总涉及项目数", "count": cnt_company},
                   {"item": year + "披露总涉及corporate数", "count": cnt_corporate},
                   {"item": year + "披露国内融资事件数", "count": cnt_d},
                   {"item": year + "披露国外融资事件数", "count": cnt_f},
                   {"item": year + "披露总国内涉及corporate数", "count": len(cds)},
                   {"item": year + "披露总国外涉及corporate数", "count": len(cfs)},
                   ])

    conn.close()
    mongo.close()
    return de
예제 #6
0
def patch_company_establish_date(company_id):
    conn = db.connect_torndb()
    mongo = db.connect_mongo()
    collection_gongshang = mongo.info.gongshang
    company1 = conn.get("select * from company where id=%s", company_id)
    establish_date = None
    if company1["corporateId"] is not None:

        corporate = conn.get("select * from corporate where id=%s",
                             company1["corporateId"])
        if corporate is not None and corporate["fullName"] is not None:
            gongshang = collection_gongshang.find_one(
                {"name": corporate["fullName"]})

            if gongshang is not None and gongshang.has_key("establishTime"):
                try:
                    if establish_date is None or (
                            gongshang["establishTime"] is not None
                            and gongshang["establishTime"] != establish_date):
                        establish_date = gongshang["establishTime"]
                except:
                    pass

        if establish_date is None:
            aliases = conn.query(
                "select * from corporate_alias where "
                "(active is null or active !='N') and corporateId=%s",
                company1["corporateId"])
            for alias in aliases:
                gongshang = collection_gongshang.find_one(
                    {"name": alias["name"]})
                if gongshang is not None and gongshang.has_key(
                        "establishTime"):
                    try:
                        if establish_date is None or (
                                gongshang["establishTime"] is not None and
                                gongshang["establishTime"] != establish_date):
                            establish_date = gongshang["establishTime"]
                    except:
                        pass
                if establish_date is not None:
                    break

        if establish_date is not None:

            logger.info("Company: %s establishDate: %s", company_id,
                        establish_date)
            try:
                conn.update(
                    "update corporate set establishDate=%s where id=%s",
                    establish_date, company1["corporateId"])
            except:
                pass

        #patch round
        if corporate is not None:
            funding = conn.get(
                "select * from funding where corporateId=%s and (active is null or active !='N') "
                "order by fundingDate desc limit 1", corporate["id"])
            if funding is not None:
                # corporate = conn.get("select * from corporate where id=%s", corporate_id)
                # if corporate is not None:
                conn.update("update corporate set round=%s where id=%s",
                            funding["round"], corporate["id"])
            else:
                if corporate["round"] is not None:
                    conn.update("update corporate set round=-1 where id=%s",
                                corporate["id"])
    conn.close()
    mongo.close()