def listingMonthlyStatistics(listingId,statDate):
    db = dbSettings.db_connect()
    cursor = db.cursor()
    start = time.time()
    cursor.execute(sql_listingMonthlyStatistics(listingId,statDate))
    item = cursor.fetchall()[0]

    monthlyPurchasePrice = item['monthlyPurchasePrice']
    monthlySumPrice = item['monthlySumPrice']
    rentRate        = item['rentRate']
    if monthlyPurchasePrice == None :
      monthlySumPrice = 0
      rentRate = 0
      monthlyPurchasePrice = 0
    cursor.execute(sql_listingPriceMonthlyStatistics(listingId,statDate))
    item = cursor.fetchall()[0]
    print(item)
    monthlyAvgPrice = item['monthlyAvgPrice'] if not item['monthlyAvgPrice'] == None else 0


    end = time.time()
    # print(1000*(end-start),"sql_listingMonthlyStatistics")
    sm.release()
    sql = """
    INSERT INTO `airbnb_scrapy_us`.`monthlystat_us`
     (`dimension`,`listingId`,`year`,`month`,`monthlyAvgPrice`,`monthlyPurchasePrice`,`monthlySumPrice`,`rentRate`,`repeat_flag`) 
     VALUES ('listing','{}','{}','{}'       ,'{}'             ,'{}'                  ,'{}'             ,'{}'      ,'{}')
     """.format(str(listingId),statDate[2:4],statDate[5:7],monthlyAvgPrice,monthlyPurchasePrice,monthlySumPrice,rentRate,statDate[2:4]+"-"+statDate[5:7]+"-"+str(listingId))
    # print(sql)
    cursor.execute(sql)
    db.commit()
    return {'listingId':listingId,'monthlyAvgPrice':monthlyAvgPrice,'monthlySumPrice':monthlySumPrice,'rentRate':rentRate}
Exemple #2
0
 def __init__(self):
     self.table = "`proxypool`"
     self.db = dbSettings.db_connect()
     self.cursor = self.db.cursor()
     self.mapTable = "`map`"
     self.listTable = "`houselist`"
     self.mapresponseTable = "`mapresponse`"
Exemple #3
0
def parseStart(bias):
    # parse = calendarParse()
    # parse.getItem(bias*10)
    # sm.release()
    # return
    try:
        parse = calendarParse()
        parse.getItem(bias * 10)
    except Exception as e:
        print(e)
        try:
            parse = calendarParse()
            parse.getItem(bias * 10)
        except:
            try:
                parse = calendarParse()
                parse.getItem(bias * 10)
            except Exception as e:
                print(e)
                print("err in {}".format(bias))

                pass

    sm.release()
    db = dbSettings.db_connect()
    cursor = db.cursor()
    sql = "INSERT INTO `calendarparselog` ( `type`, `infor`)\
            VALUES ('{}','{}');".format("start parse", bias * 10)
    print(sql)
    cursor.execute(sql)
    db.commit()
def detailParse():




if __name__ == "__main__":
    # f_in = open( 'src.json', 'r',encoding = 'utf-8' )
    # f_out = open( 'tgt.json', 'w',encoding = 'utf-8' )

    # jsonData =  json.loads(f_in.read())

    db = dbSettings.db_connect()
    cursor = db.cursor()

    startResponseId = 0
    endResponseId = getMaxNumOfDetailResponse(db,cursor)

    landmark = set()
    amenity = set()

    for responseId in range(startResponseId, endResponseId+1,1000):
        landmark,amenity = detailParse(responseId,landmark,amenity)



    decode = decodeDetail()
    meta = decode.decode(jsonData)

    landmark |= set(meta['landmark'])
    amenity |= set(meta['amenity'])



    pprint(landmark,amenity)
def batchGeoEncode(bias, existLocation):
    db = dbSettings.db_connect()
    cursor = db.cursor()
    cursor.execute(
        "SELECT id,listingid,lat,lng FROM `detail` where id between {} and {}".
        format(bias, bias + 100))
    print(
        "SELECT id,listingid,lat,lng FROM `detail` where id between {} and {}".
        format(bias, bias + 100))
    results = cursor.fetchall()

    sql = "INSERT INTO `listing_location`(`listingid`, `lng`, `lat`, `formatted_address`, `province`, `city`, `district`, `township`)\
         VALUE (%s,%s,%s,%s,%s,%s,%s,%s)"

    vals = []
    count = 0
    for row in results:
        # count += 1
        # print(count)
        if row["listingid"] in existLocation:
            continue
        print(row["lng"], ",", row["lat"])
        addressComponent = decodeLocation(row["lng"], row["lat"])
        vals.append(
            (row["listingid"], row["lng"], row["lat"],
             addressComponent["formatted_address"],
             addressComponent["province"], addressComponent["city"],
             addressComponent["district"], addressComponent["township"]))

        # print(row["listingid"],row["lng"],row["lat"],addressComponent["formatted_address"],addressComponent["province"],addressComponent["city"],addressComponent["district"],addressComponent["township"])

    cursor.executemany(sql, vals)
    db.commit()
Exemple #6
0
def dbInsert(listingId, Lng, Lat, locationInfor):
    db = dbSettings.db_connect()
    cursor = db.cursor()
    if locationInfor == None:
        sql = '''
    INSERT INTO `airbnb_scrapy_us`.`listing_location_us` 
    (`listingid`,`lng`,`lat`,`country`) 
    VALUES ('{}',{},{},'unknown')
    '''.format(listingId, Lng, Lat)
    else:
        sql = '''
    INSERT INTO `airbnb_scrapy_us`.`listing_location_us` 
    (`listingid`,`lng`,`lat`,`country`,`state`,`county`,`postcode`,`town`,`formatted_address`) 
    VALUES ('{}',{},{},'{}','{}','{}','{}','{}','{}');
    '''.format(listingId, Lng, Lat, locationInfor['country'],
               locationInfor['state'], locationInfor['county'],
               locationInfor['postcode'], locationInfor['town'],
               locationInfor['display_name'])
    # print(sql)
    try:
        cursor.execute(sql)
        db.commit()
    except Exception as e:
        print(e.args)
        print(sql)
Exemple #7
0
 def __init__(self):
     self.db = dbSettings.db_connect()
     self.cursor = self.db.cursor()
     self.listTable = "`houselist`"
     self.calendarResponseTable = "`calendarresponse`"
     self.redis = redis.Redis.from_url(REDIS_URL)
     print(REDIS_URL)
 def __init__(self):
     self.db = dbSettings.db_connect()
     self.cursor = self.db.cursor()
     self.listTable = "`houselist`"
     self.detailTable = "`detail`"
     self.redis = redis.Redis.from_url(REDIS_URL)
     print(REDIS_URL)
 def __init__(self):
     self.table = "`proxypool`"
     self.db = dbSettings.db_connect()
     self.cursor = self.db.cursor()
     self.redis = redis.Redis.from_url(REDIS_URL)
     self.mapTable = "`map`"
     self.listTable = "`houselist`"
     self.mapresponseTable = "`mapresponse`"
Exemple #10
0
 def __init__(self):
     self.proxyId = 0
     self.ip = ""
     self.table = "`proxypool_us`"
     # print("middleline-proxyPool1")
     self.db = dbSettings.db_connect()
     # print("middline-proxyPool2")
     self.cursor = self.db.cursor()
 def __init__(self):
     self.db = dbSettings.db_connect()
     self.cursor = self.db.cursor()
     self.listTable = "`houselist`"
     self.responseTable = "calendarresponse"
     self.responseLogTable = "calendarparselog"
     self.redis = redis.Redis.from_url(REDIS_URL)
     print(time.asctime(time.localtime(time.time())))
def getListingId():
    db = dbSettings.db_connect()
    cursor = db.cursor()

    sql = "SELECT house_id FROM `houselist_us` ORDER BY RAND() LIMIT 30000"
    cursor.execute(sql)
    db.commit()
    result = cursor.fetchall()
    return [i['house_id'] for i in result]
def dbInsertparselog(type, responseId, infor):
    # 数据库链接
    db = dbSettings.db_connect()
    cursor = db.cursor()

    sql = "INSERT IGNORE INTO `calendarparselog` (`type`, `response_id`, `infor`)\
           VALUES ('{}',{},'{}');".format(type, responseId, infor)
    print(sql)
    cursor.execute(sql)
    db.commit()
def getIp():
    db = dbSettings.db_connect()
    cursor = db.cursor()
    while(1):
        print("test ip pool")
        time.sleep(0.5)
        sql = "SELECT * from `proxypool` WHERE `state` != 'del'"
        cursor.execute(sql)
        db.commit()
        results = cursor.fetchall()
        if(len(results) < 10):
            proxypool = proxyPool()
            proxies = proxypool.get(num=10)
def getIp():
    db = dbSettings.db_connect()
    cursor = db.cursor()
    timeNow = datetime.datetime.now()
    while (1):
        print("test ip pool")
        time.sleep(1)
        sql = "SELECT * from `proxypool_us` WHERE `state` != 'del'"
        cursor.execute(sql)
        db.commit()
        results = cursor.fetchall()
        if (len(results) < 10):
            proxypool = proxyPool()
            proxies = proxypool.get(num=10)
        for row in results:
            deltaTime = (timeNow - row['init_time']).seconds
    def __init__(self):
        self.table = "`proxypool`"
        self.db = dbSettings.db_connect()
        self.cursor = self.db.cursor()
        self.mapTable = "`map`"
        self.listTable = "`houselist`"
        self.mapresponseTable = "`mapresponse`"
        self.calendarresponseTable = "`calendarresponse`"

        localtime = time.localtime(time.time())
        self.mouth = localtime[1]
        self.year = localtime[0]
        self.day = localtime[2]
        self.dtToday = "{}-{}-{}".format(self.year, self.mouth, self.day)

        self.orderList = []
        self.priceList = []
        self.house_id = 0
def dbInsert(listingId, Lng, Lat, locationInfor):
    db = dbSettings.db_connect()
    cursor = db.cursor()
    if locationInfor == None:
        sql = '''
    INSERT INTO `airbnb_scrapy_us`.`listing_location_us` 
    (`listingid`,`lng`,`lat`,`country`) 
    VALUES ('{}',{},{},'abroad')
    '''.format(listingId, Lng, Lat)
    else:
        sql = '''
    INSERT INTO `airbnb_scrapy_us`.`listing_location_us` 
    (`listingid`,`lng`,`lat`,`country`,`state`,`city`,`MSACode`,`MSAName`) 
    VALUES ('{}',{},{},'{}','{}','{}','{}','{}');
    '''.format(listingId, Lng, Lat, "US", locationInfor['sStateName'],
               locationInfor['sCountyName'], locationInfor['sMSACode'],
               locationInfor['sMSAName'])
    cursor.execute(sql)
    db.commit()
def areaMonthlyStatistics(areaRow,statDate,dimension):
    db = dbSettings.db_connect()
    cursor = db.cursor()
    # print(sql_areaMonthlyStatistics(areaRow,statDate))
    cursor.execute(sql_areaMonthlyStatistics(areaRow,statDate,dimension))
    print(areaRow)
    result = cursor.fetchall()[0]
    print(result)
    if result['monthlySumPrice'] == None:
      return
    statResult = result

    cursor.execute(sql_areaFeatureStatistics(areaRow,statDate,dimension))
    print(areaRow)
    result = cursor.fetchall()[0]
    FeatureResult = {}

    rentalType = {"Entire Home":0,"Private Room":0,"Shared Room":0}
    reviewSummary = {"reviewCount":0,"reviewSummary_wei_zhi_bian_li":0,"reviewSummary_ru_zhu_bian_jie":0,"reviewSummary_ru_shi_miao_shu":0,"reviewSummary_gan_jing_wei_sheng":0,"reviewSummary_gou_tong_shun_chang":0,"reviewSummary_gao_xing_jia_bi":0}
    for row in result :
      # rentalType
      if "整套" in row["propertyType"]:
        rentalType["Entire Home"] += 1
      if "独立" in row["propertyType"]:
        rentalType["Private Room"] += 1
      if "合住" in row["propertyType"]:
        rentalType["Shared Room"] += 1

      # reviewSummary
      for item in ["reviewSummary_wei_zhi_bian_li","reviewSummary_ru_zhu_bian_jie","reviewSummary_ru_shi_miao_shu","reviewSummary_gan_jing_wei_sheng","reviewSummary_gou_tong_shun_chang","reviewSummary_gao_xing_jia_bi"]:
        reviewSummary[item] += row[item]
      reviewSummary["reviewCount"] += 1

    for item in ["reviewSummary_wei_zhi_bian_li","reviewSummary_ru_zhu_bian_jie","reviewSummary_ru_shi_miao_shu","reviewSummary_gan_jing_wei_sheng","reviewSummary_gou_tong_shun_chang","reviewSummary_gao_xing_jia_bi"]:
        reviewSummary[item] = reviewSummary[item]/reviewSummary["reviewCount"]



    sql = "INSERT INTO `airbnb_scrapy_us`.`monthlystat_us` (`dimension`,`province` , `city`, `district`,`year`,`month`,`monthlyAvgPrice`,"+\
      "`monthlySumPrice`,`rentRate`,`repeat_flag`,`monthlyPurchasePrice`) VALUES ('{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}')".format(dimension,areaRow['province'],areaRow['city'],areaRow['district'],statDate[2:4],statDate[5:7],result['monthlyAvgPrice'],result['monthlySumPrice'],result['rentRate'],dimension+"-"+statDate[2:4]+"-"+statDate[5:7]+"-"+areaRow['province']+areaRow['city']+areaRow['district'],result['monthlyPurchasePrice'])
    cursor.execute(sql)
    db.commit()
Exemple #19
0
def getIp():
    db = dbSettings.db_connect()
    cursor = db.cursor()
    timeNow = datetime.datetime.now()
    proxypool = proxyPool()
    while (1):
        time.sleep(1)
        sql = "SELECT * from `proxypool` WHERE `state` != 'del'"
        cursor.execute(sql)
        db.commit()
        results = cursor.fetchall()
        n_ip_kuaidaili = proxypool.checkBalance()
        if n_ip_kuaidaili == 0:
            SMTP("*****@*****.**", "发信内容", "快代理ip数耗尽警告", "DaduosuMonitor")
            time.sleep(180)
        print("test ip pool ,n_ip_proxypool: {} , n_ip_kuaidaili: {}".format(
            len(results), n_ip_kuaidaili))
        if (len(results) < 5):
            proxies = proxypool.get(num=3)
        for row in results:
            deltaTime = (timeNow - row['init_time']).seconds
Exemple #20
0
 def __init__(self):
     self.table = "`proxypool_us`"
     self.db = dbSettings.db_connect()
     self.cursor = self.db.cursor()
     self.redis = redis.Redis.from_url(REDIS_URL)
 def __init__(self):
     self.db = dbSettings.db_connect()
     self.cursor = self.db.cursor()
     self.detailresponseTable = "`detailresponse`"
Exemple #22
0
    cursor.execute(sql)
    db.commit()
    return {
        'listingId': listingId,
        'monthlyAvgPrice': monthlyAvgPrice,
        'monthlySumPrice': monthlySumPrice,
        'rentRate': rentRate
    }


sm = threading.Semaphore(30)

if __name__ == "__main__":
    statDate = '2021-03-01'

    db = dbSettings.db_connect()
    cursor = db.cursor()

    # YEAR & MONTH
    cursor.execute("SELECT YEAR('{}') as `YEAR`".format(statDate))
    YEAR = cursor.fetchall()[0]['YEAR']

    cursor.execute("SELECT MONTH('{}') as `MONTH`".format(statDate))
    MONTH = cursor.fetchall()[0]['MONTH']
    print(YEAR, MONTH)

    ########## listing start ##########
    # cursor.execute("SELECT `house_id` FROM houselist")
    # results = cursor.fetchall()
    # start = time.time()
    # count = 0
def dbUtils(sql):
    db = dbSettings.db_connect()
    cursor = db.cursor()
    cursor.execute(sql)
    db.commit()
    return cursor.fetchall()
Exemple #24
0
def areaMonthlyStatistics(areaRow, statDate, dimension):
    db = dbSettings.db_connect()
    cursor = db.cursor()
    # print(sql_areaMonthlyStatistics(areaRow,statDate))
    cursor.execute(sql_areaMonthlyStatistics(areaRow, statDate, dimension))
    print(areaRow)
    result = cursor.fetchall()[0]
    print(result)
    if result['monthlySumPrice'] == None:
        return

    statResult = result

    cursor.execute(sql_areaFeatureStatistics(areaRow, statDate, dimension))
    print(areaRow)
    result = cursor.fetchall()
    FeatureResult = {}

    rentalType = {"Entire Home": 0, "Private Room": 0, "Shared Room": 0}
    airbnbRating = {
        "reviewCount": 0,
        "reviewSummary_wei_zhi_bian_li": 0,
        "reviewSummary_ru_zhu_bian_jie": 0,
        "reviewSummary_ru_shi_miao_shu": 0,
        "reviewSummary_gan_jing_wei_sheng": 0,
        "reviewSummary_gou_tong_shun_chang": 0,
        "reviewSummary_gao_xing_jia_bi": 0
    }
    rentalSize = {}
    amenity = {}
    # rentalSize init
    for count in [1, 2, 3, 4, 5]:
        rentalSize["roomCount_" + str(count)] = 0
    rentalSize["roomCount_5+"] = 0

    # amenity init
    for k, v in result[0].items():
        # print(k[:6])
        if k[:7] == "amenity":
            amenity[k] = 0

    for row in result:
        # print(row)
        # rentalType
        if "整套" in row["propertyType"]:
            rentalType["Entire Home"] += 1
        if "独立" in row["propertyType"]:
            rentalType["Private Room"] += 1
        if "合住" in row["propertyType"]:
            rentalType["Shared Room"] += 1

        # airbnbRating
        for item in [
                "reviewSummary_wei_zhi_bian_li",
                "reviewSummary_ru_zhu_bian_jie",
                "reviewSummary_ru_shi_miao_shu",
                "reviewSummary_gan_jing_wei_sheng",
                "reviewSummary_gou_tong_shun_chang",
                "reviewSummary_gao_xing_jia_bi"
        ]:
            airbnbRating[item] += row[item]
        if row["reviewSummary_wei_zhi_bian_li"] > 0:
            airbnbRating["reviewCount"] += 1
        # print(airbnbRating["reviewSummary_wei_zhi_bian_li"])

        # rentalSize
        rowRoom = row["chinaTitleDetails_ROOM"]
        if rowRoom == None:
            rowRoom = ""
        if "卧室" in rowRoom:
            for count in [1, 2, 3, 4, 5]:
                if str(count) in rowRoom:
                    rentalSize["roomCount_" + str(count)] += 1
            for num in range(6, 20):
                if str(num) in rowRoom:
                    rentalSize["roomCount_5+"] += 1

        for k, v in row.items():
            if k[:7] == "amenity":
                amenity[k] += row[k]

    for item in [
            "reviewSummary_wei_zhi_bian_li", "reviewSummary_ru_zhu_bian_jie",
            "reviewSummary_ru_shi_miao_shu",
            "reviewSummary_gan_jing_wei_sheng",
            "reviewSummary_gou_tong_shun_chang",
            "reviewSummary_gao_xing_jia_bi"
    ]:
        if airbnbRating["reviewCount"] > 0:
            airbnbRating[
                item] = airbnbRating[item] / airbnbRating["reviewCount"]

    pprint(rentalType)
    # pprint(airbnbRating)
    # pprint(rentalSize)
    # pprint(amenity)
    result = statResult

    sql = "INSERT INTO `airbnb_scrapy`.`monthlystat` (`dimension`,`province` , `city`, `district`,`year`,`month`,`monthlyAvgPrice`,"+\
      "`monthlySumPrice`,`rentRate`,`repeat_flag`,`monthlyPurchasePrice`,`rentalType`,`airbnbRating`,`rentalSize`,`amenity`) "+\
        "VALUES ('{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}','{}')".format(dimension,areaRow['province'],areaRow['city'],areaRow['district'],statDate[2:4],statDate[5:7],result['monthlyAvgPrice'],result['monthlySumPrice'],result['rentRate'],dimension+"-"+statDate[2:4]+"-"+statDate[5:7]+"-"+areaRow['province']+areaRow['city']+areaRow['district'],result['monthlyPurchasePrice'],json.dumps(rentalType),json.dumps(airbnbRating),json.dumps(rentalSize),json.dumps(amenity))
    # print(sql)
    cursor.execute(sql)
    db.commit()
 def __init__(self):
     self.proxyId = 0
     self.ip = ""
     self.table = "`proxypool_us`"
     self.db = dbSettings.db_connect()
     self.cursor = self.db.cursor()