コード例 #1
0
    def nextPageNegativeOneAllPage(self, response, url):
      np = []
      maxURL = ''
      nextPageText = ''.join(response.xpath(self.xpath['nextPageText']).extract()).strip()
      if nextPageText == '下一页':
        a = response.xpath(self.xpath['allPage']).extract()
        for one in a:
          maxURL = one
          break
      else:
        nextPageText = ''.join(response.xpath(self.xpath['nextPageText2']).extract()).strip()
        if nextPageText == '下一页':
          a = response.xpath(self.xpath['allPage']).extract()
          for one in a:
            maxURL = one
            break

      if maxURL is '':
        return []

      tmp = maxURL.split('/')
      maxNumber = util.String2Number(tmp[-2]) if tmp[-1] == '' else util.String2Number(tmp[-1])
      if self.reversed:
        for i in range(int(maxNumber), 1, -1):
          np.append(url + 'g' + str(i))
      else:
        for i in range(2, int(maxNumber) + 1):
          np.append(url + 'g' + str(i))


      return np
コード例 #2
0
ファイル: dv3.py プロジェクト: reinhardtken/py-code
 def processSong(self, value):
     # 处理送股
     index = value.find('送')
     index2 = value.find('转')
     if index != -1:
         newValue = value[index + 1:]
         out = util.String2Number(newValue)
         return out
     elif index2 != -1:
         newValue = value[index2 + 1:]
         out = util.String2Number(newValue)
         return out
     else:
         return 0
コード例 #3
0
ファイル: dv3.py プロジェクト: reinhardtken/py-code
    def CalcDividend(self, year, position):
        # '10送3.00派4.00元(含税,扣税后3.30元)'
        if const.GPFH_KEYWORD.KEY_NAME['AllocationPlan'] in self.checkPoint[
                year][position]:
            value = self.checkPoint[year][position][
                const.GPFH_KEYWORD.KEY_NAME['AllocationPlan']]
            number = util.String2Number(value)
            profit = self.processPai(value)
            self.checkPoint[year][position]['dividend'] = profit / number
            profit2 = self.processPai2(value)
            self.checkPoint[year][position][
                'dividend_aftertax'] = profit2 / number
            gift = self.processSong(value)
            self.checkPoint[year][position]['gift'] = gift / number

            if const.GPFH_KEYWORD.KEY_NAME['CQCXR'] in self.checkPoint[year][
                    position]:
                tmpDate = self.checkPoint[year][position][
                    const.GPFH_KEYWORD.KEY_NAME['CQCXR']]
                # 农业银行,2010年
                if tmpDate == '-':
                    if position == 'midYear':
                        # 半年默认在当年年底除权
                        tmpDate = pd.Timestamp(datetime(year, 12, 1))
                    else:
                        tmpDate = pd.Timestamp(datetime(year + 1, 6, 30))
                else:
                    tmpDate = pd.to_datetime(np.datetime64(tmpDate))
                self.dividendPoint.append(
                    DividendPoint(
                        tmpDate, self.checkPoint[year][position]['dividend'],
                        self.checkPoint[year][position]['dividend_aftertax'],
                        self.checkPoint[year][position]['gift'], year,
                        position))
コード例 #4
0
    def Run(codes):
        out = []
        for one in codes:
            try:
                baseCounter = 0
                hitCounter = 0
                firstQuarter = None
                lastQuarter = None
                baseCounter2010 = 0
                hitCounter2010 = 0
                firstQuarter2010 = None
                lastQuarter2010 = None
                df = util.LoadData('stock',
                                   'yjbg2-' + one['_id'],
                                   condition={},
                                   sort=[('_id', 1)])
                for quarter, row in df.iterrows():
                    id = datetime.strptime(quarter, '%Y-%m-%d')
                    value = util.String2Number(row['sjltz'])
                    if not np.isnan(value):
                        if firstQuarter is None:
                            firstQuarter = id
                        lastQuarter = id
                        baseCounter += 1
                        if id.year >= 2010:
                            if firstQuarter2010 is None:
                                firstQuarter2010 = id
                            lastQuarter2010 = id
                            baseCounter2010 += 1
                        if value < -10:
                            hitCounter += 1
                            if id.year >= 2010:
                                hitCounter2010 += 1

                percent = 0
                percent2010 = 0
                if baseCounter > 0:
                    percent = hitCounter / baseCounter
                if baseCounter2010 > 0:
                    percent2010 = hitCounter2010 / baseCounter2010
                out.append({
                    '_id': one['_id'],
                    'begin': firstQuarter,
                    'end': lastQuarter,
                    'base': baseCounter,
                    'hit': hitCounter,
                    'percent': percent,
                    'begin2010': firstQuarter2010,
                    'end2010': lastQuarter2010,
                    'base2010': baseCounter2010,
                    'hit2010': hitCounter2010,
                    'percent2010': percent2010
                })
            except Exception as e:
                util.PrintException(e)
        dfOut = pd.DataFrame(out)
        util.SaveMongoDB_DF(dfOut, 'stock_statistics2',
                            'dangerousQuarterRatio')
コード例 #5
0
ファイル: dv3.py プロジェクト: reinhardtken/py-code
 def processPai2(self, value):
     # 处理派息,税后
     index = value.find('扣税后')
     if index != -1:
         newValue = value[index + 1:]
         out = util.String2Number(newValue)
         return out
     else:
         return 0
コード例 #6
0
ファイル: zyShenzhen2.py プロジェクト: reinhardtken/py-code
    def parseOne(self, one):
      oneOut = items.HouseItem()
      oneOut['src'] = self.src
      oneOut['district'] = ''.join(one.xpath('./div[1]/p[3]/a[1]/text()').extract()).strip()
      oneOut['subDistrict'] = ''.join(one.xpath('./div[1]/p[3]/a[2]/text()').extract()).strip()
      oneOut['title'] = ''.join(one.xpath('./div[1]/h4/a/text()').extract()).strip()
      href = ''.join(one.xpath('./div[1]/h4/a/@href').extract()).strip()
      if len(href) > 0:
        id = '-1'
        try:
          id = href.split('/')[-1][:-5]
        except Exception as e:
          logging.warning("parseOne Exception %s" % (str(e)))
        oneOut['_id'] = id

      try:
        totalPrice = util.String2Number(''.join(one.xpath('./div[2]/p[1]/span/text()').extract()).strip())
        oneOut['totalPrice'] = totalPrice
        oneOut['unitPrice'] = util.String2Number(
          ''.join(one.xpath('./div[2]/p[2]/text()').extract()).strip())

        oneOut['community'] = ''.join(one.xpath('./div[1]/p[1]/a/text()').extract())
        # community = community.split(' ')
        # if len(community) >= 2:
        #   oneOut['community'] = community[1]

        oneOut['houseType'] = ''.join(one.xpath('./div[1]/p[1]/span[2]/text()').extract()).strip()
        if oneOut['houseType'] == '|':
          oneOut['houseType'] = ''.join(one.xpath('./div[1]/p[1]/span[3]/text()').extract()).strip()

        oneOut['square'] = util.String2Number(''.join(one.xpath('./div[1]/p[1]/span[4]/text()').extract()).strip())
        if np.isnan(oneOut['square']):
          oneOut['square'] = util.String2Number(''.join(one.xpath('./div[1]/p[1]/span[5]/text()').extract()).strip())

        oneOut['level'] = ''.join(one.xpath('./div[1]/p[2]/span[1]/text()').extract()).strip()

        oneOut['crawlDate'] = util.today()

      except Exception as e:
        print(e)
        logging.warning("parseOne Exception %s"%(str(e)))
      return oneOut
コード例 #7
0
ファイル: zyShenzhen2.py プロジェクト: reinhardtken/py-code
    def nextPageNegativeOneAllPage(self, response, url):
      np = []
      maxURL = None
      maxURL = ''.join(response.xpath(self.xpath['allPage']).extract()).strip()
      # if len(allPage):
      #   maxURL = allPage[0].strip()

      if maxURL is '':
        return []

      tmp = maxURL.split('/')
      maxNumber = util.String2Number(tmp[-2]) if tmp[-1] == '' else util.String2Number(tmp[-1])
      if self.reversed:
        for i in range(int(maxNumber), 1, -1):
          np.append(url + 'g' + str(i))
      else:
        for i in range(2, int(maxNumber) + 1):
          np.append(url + 'g' + str(i))

      return np
コード例 #8
0
ファイル: ljShanghai.py プロジェクト: reinhardtken/py-code
    def nextPageNegativeOne(self, response, url):
        np = []
        maxURL = None
        nextPageText = ''.join(
            response.xpath(self.xpath['nextPageText']).extract()).strip()
        if nextPageText == '下一页':
            tmp = response.xpath(self.xpath['allPage2']).extract()
            if len(tmp):
                maxURL = tmp[0].strip()
        else:
            tmp = response.xpath(self.xpath['nextPage']).extract()
            if len(tmp):
                maxURL = tmp[0].strip()

        if maxURL is not None:
            tmp = maxURL.split('/')
            maxNumber = util.String2Number(
                tmp[-2]) if tmp[-1] == '' else util.String2Number(tmp[-1])
            for i in range(2, int(maxNumber) + 1):
                np.append(url + 'pg' + str(i))

        return np
コード例 #9
0
    def parseOne(self, one, district, subDistrict):
      oneOut = items.HouseItem()
      oneOut['src'] = self.src
      oneOut['district'] = district
      oneOut['subDistrict'] = subDistrict
      oneOut['title'] = ''.join(one.xpath('./div/div[2]/div[1]/a/text()').extract()).strip()
      href = ''.join(one.xpath('./div/div[2]/div[1]/a/@href').extract()).strip()
      if len(href) > 0:
        id = '-1'
        try:
          id = href.split('/')[-1][:-5]
        except Exception as e:
          logging.warning("parseOne Exception %s" % (str(e)))
        oneOut['_id'] = id

      try:
        totalPrice = util.String2Number(''.join(one.xpath('./div/div[3]/h3/span/text()').extract()).strip())
        oneOut['totalPrice'] = totalPrice
        oneOut['unitPrice'] = util.String2Number(
          ''.join(one.xpath('./div/div[3]/p/text()').extract()).strip())

        oneOut['community'] = ''.join(one.xpath('./div/div[2]/div[2]/a/text()').extract())

        tmp = ''.join(one.xpath('./div/div[2]/div[2]/text()').extract()).strip()
        tmp2 = tmp.split('|')
        if len(tmp2) > 0:
          oneOut['houseType'] = tmp2[0]
          if len(tmp2) > 1:
            oneOut['square'] = util.String2Number(tmp2[1])
            if len(tmp2) > 4:
              oneOut['level'] = tmp2[3] + '-' + tmp2[4]

        oneOut['crawlDate'] = util.today()

      except Exception as e:
        print(e)
        logging.warning("parseOne Exception %s"%(str(e)))
      return oneOut
コード例 #10
0
ファイル: trend.py プロジェクト: reinhardtken/backtest-py
    def Run(codes):
        #计算全财报的,计算2010年作为开始年份的
        for one in codes:
            try:
                out = []
                # out2010 = []
                beforeSJLTZ = 0
                beforeContinuityTrend = 0
                # beforeSJLTZ2010 = 0
                beforeContinuityTrend2010 = 0
                df = util.LoadData('stock',
                                   'yjbg-' + one,
                                   condition={},
                                   sort=[('_id', 1)])
                for quarter, row in df.iterrows():
                    id = datetime.strptime(quarter, '%Y-%m-%d')
                    value = util.String2Number(row['sjltz'])
                    if not np.isnan(value):
                        nowTrend = None
                        #这个条件不过分,一个公司即使业务完全停止增长,考虑到通胀也该在数值上是增长的
                        if value < 0:
                            nowTrend = -1
                            # nowTrend2010 = -1
                        elif value - beforeSJLTZ < 0:
                            nowTrend = -0.5
                            # nowTrend2010 = -0.5
                        else:
                            nowTrend = 1
                            # nowTrend2010 = 1
                        beforeSJLTZ = value
                        nowContinuityTrend = beforeContinuityTrend + nowTrend
                        beforeContinuityTrend = nowContinuityTrend
                        out.append({
                            '_id': id,
                            'nowPMT': nowTrend,
                            'continuityPMT': nowContinuityTrend
                        })
                        if id.year >= 2010:
                            nowContinuityTrend2010 = beforeContinuityTrend2010 + nowTrend
                            beforeContinuityTrend2010 = nowContinuityTrend2010
                            out[-1].update({
                                'continuityPMTFrom2010':
                                nowContinuityTrend2010
                            })

                dfOut = pd.DataFrame(out)
                util.SaveMongoDB_DF(dfOut, 'stock_statistics', one)
            except Exception as e:
                print(e)
コード例 #11
0
ファイル: zyDigest.py プロジェクト: reinhardtken/py-code
 def parseUpDown(self, response):
     path = [
         '/html/body/div[@class="portraitLayer "]/div/p',
         # '/html/body/div[@class="portraitLayer tiphide openHotTip"]/div/p',
     ]
     clazz = {
         'up': './span[@class="up"]/text()',
         'down': './span[@class="down"]/text()',
     }
     for one in path:
         p = response.xpath(one)
         for k, v in clazz.items():
             percent = util.String2Number(''.join(
                 p.xpath(v).extract()).strip())
             if not np.isnan(percent):
                 return (k, percent)
コード例 #12
0
    def parse(self, response):
      self.received.add(response.url)

      if 'step' not in response.meta or response.meta['step'] == 0:
        districts = self.parseDistricts(response)
        realOut = set(districts) - self.received
        for one in realOut:
          yield Request(one, meta={'step': 0})

      if 'step' in response.meta and response.meta['step'] <= 1:
        subDistricts = self.parseSubDistricts(response)
        realOut = set(subDistricts) - self.received
        for one in realOut:
          yield Request(one, meta={'step': 1, 'url': one})


      district = ''
      subDistrict = ''


      if 'step' in response.meta:

        if response.meta['step'] == 1:
          d = response.xpath(self.xpath['districtName']).extract()
          if len(d):
            district = d[0]

          d = response.xpath(self.xpath['subDistrictName']).extract()
          if len(d):
            subDistrict = d[0]

          number = util.String2Number(''.join(response.xpath(self.xpath['districtNumber']).extract()).strip())
          n = items.HouseDetailDigest()
          n['city'] = self.city
          n['src'] = self.src
          n['district'] = district
          n['subDistrict'] = subDistrict
          n['number'] = number
          today = util.todayString()
          try:
            n['_id'] = today + '_' + self.city + '_' + district + '_' + subDistrict
          except Exception as e:
            print(e)
          yield n

          nextPage = self.nextPage(response, self.head, response.meta['url'], number)
          realOut = set(nextPage) - self.received
          for one in realOut:
            print('next url: %s %s %s'%(district, subDistrict, one))
            yield Request(one, meta={'step': 2, 'district': district, 'subDistrict': subDistrict})

        if response.meta['step'] == 2:
          district = response.meta['district']
          subDistrict = response.meta['subDistrict']

        if response.meta['step'] >= 1:
          ones = response.xpath(self.xpath['lists'])

          for one in ones:
            oneOut = self.parseOne(one, district, subDistrict)
            yield oneOut
コード例 #13
0
ファイル: ljBeijing.py プロジェクト: reinhardtken/py-code
    def parseOne(self, one, district, subDistrict):
        # {'_id': '',
        #  'area': '',
        #  'attention': '',
        #  'community': '',
        #  'crawlDate': datetime.datetime(2019, 8, 24, 0, 0),
        #  'district': '朝阳',
        #  'level': ')',
        #  'src': 'lj',
        #  'subDistrict': '通州北苑',
        #  'title': '',
        #  'totalPrice': nan,
        #  'unitPrice': nan}
        oneOut = items.HouseItem()
        oneOut['src'] = self.src
        oneOut['district'] = district
        oneOut['subDistrict'] = subDistrict
        oneOut['title'] = ''.join(
            one.xpath('.//div[1]/div[1]/a/text()').extract()).strip()
        oneOut['_id'] = ''.join(
            one.xpath('.//div[1]/div[1]/a/@data-housecode').extract()).strip()
        try:
            unitPrice = util.String2Number(''.join(
                one.xpath(
                    './/div[1]/div[6]/div[2]/span/text()').extract()).strip())
            if not np.isnan(unitPrice):
                oneOut['unitPrice'] = unitPrice
                oneOut['totalPrice'] = util.String2Number(''.join(
                    one.xpath('.//div[1]/div[6]/div[1]/span/text()').extract()
                ).strip())
            else:
                # https://sh.lianjia.com/ershoufang/changning/pg96/
                oneOut['unitPrice'] = util.String2Number(''.join(
                    one.xpath('.//div[1]/div[7]/div[2]/span/text()').extract()
                ).strip())
                oneOut['totalPrice'] = util.String2Number(''.join(
                    one.xpath('.//div[1]/div[7]/div[1]/span/text()').extract()
                ).strip())

            oneOut['community'] = ''.join(
                one.xpath('.//div[1]/div[2]/div/a/text()').extract())
            houseInfo = ''.join(
                one.xpath('.//div[1]/div[2]/div/text()').extract())
            houseInfo = houseInfo.split('|')
            if len(houseInfo) > 1:
                oneOut['houseType'] = houseInfo[1].strip()
                if len(houseInfo) > 2:
                    oneOut['square'] = util.String2Number(houseInfo[2].strip())

            #'/html/body/div[4]/div[1]/ul/li[1]/div[1]/div[3]/div/a'
            oneOut['area'] = util.ExtractString(
                one, './/div[1]/div[3]/div/a/text()')
            positionInfo = ''.join(
                one.xpath('.//div[1]/div[3]/div/text()').extract())
            positionInfo = positionInfo.split(')')
            if len(positionInfo) > 0:
                oneOut['level'] = positionInfo[0].strip() + ')'
                if len(positionInfo) > 1:
                    oneOut['structure'] = positionInfo[1].strip()

            followInfo = ''.join(
                one.xpath('.//div[1]/div[4]/text()').extract())
            followInfo = followInfo.split('/')
            if len(followInfo) > 0:
                oneOut['attention'] = followInfo[0].strip()
                if len(followInfo) > 1:
                    oneOut['follow'] = followInfo[1].strip()
                    if len(followInfo) > 2:
                        oneOut['release'] = followInfo[2].strip()

            oneOut['crawlDate'] = util.today()

        except Exception as e:
            print(e)
            logging.warning("parseOne Exception %s" % (str(e)))
        return oneOut
コード例 #14
0
ファイル: ljShanghai.py プロジェクト: reinhardtken/py-code
    def parse(self, response):
        self.received.add(response.url)

        districts = self.parseDistricts(response)
        realOut = set(districts) - self.received
        for one in realOut:
            yield Request(one, meta={'step': 0})

        subDistricts = self.parseSubDistricts(response)
        realOut = set(subDistricts) - self.received
        for one in realOut:
            yield Request(one, meta={'step': 1, 'url': one})

        district = np.nan
        subDistrict = np.nan

        if 'step' in response.meta:

            if response.meta['step'] == 1:
                d = response.xpath(self.xpath['districtName']).extract()
                if len(d):
                    district = d[0]

                d = response.xpath(self.xpath['subDistrictName']).extract()
                if len(d):
                    subDistrict = d[0]

                number = util.String2Number(''.join(
                    response.xpath(
                        self.xpath['districtNumber']).extract()).strip())
                n = items.HouseDetailDigest()
                n['city'] = self.city
                n['src'] = self.src
                n['district'] = district
                n['subDistrict'] = subDistrict
                n['number'] = number
                n['_id'] = util.todayString() + '_' + n['city'] + '_' + n[
                    'district'] + '_' + n['subDistrict']
                yield n

                nextPage = self.nextPage(response, self.head,
                                         response.meta['url'])
                realOut = set(nextPage) - self.received
                for one in realOut:
                    # nextURL = self.head + one
                    print('next url: %s %s %s' % (district, subDistrict, one))
                    yield Request(one,
                                  meta={
                                      'step': 2,
                                      'district': district,
                                      'subDistrict': subDistrict
                                  })

            if response.meta['step'] == 2:
                district = response.meta['district']
                subDistrict = response.meta['subDistrict']

            if response.meta['step'] >= 1:
                ones = response.xpath(self.xpath['lists'])

                for one in ones:
                    oneOut = self.parseOne(one, district, subDistrict)
                    if len(oneOut['_id']):
                        yield oneOut
                    else:
                        continue