def _basic_init(self): self._page = rget(self._url) if self._page.text[:800].find("Data_fundSharesPositions") >= 0: raise FundTypeError( "This code seems to be a fund, use fundinfo instead") l = eval( re.match(r".*Data_millionCopiesIncome = ([^;]*);.*", self._page.text).groups()[0]) self.name = re.match(r".*fS_name = \"([^;]*)\";.*", self._page.text).groups()[0] tz_bj = dt.timezone(dt.timedelta(hours=8)) datel = [ dt.datetime.fromtimestamp(int(d[0]) / 1e3, tz=tz_bj).replace(tzinfo=None) for d in l ] ratel = [float(d[1]) for d in l] netvalue = [1] for dailyrate in ratel: netvalue.append(netvalue[-1] * (1 + dailyrate * 1e-4)) netvalue.remove(1) df = pd.DataFrame( data={ "date": datel, "netvalue": netvalue, "totvalue": netvalue, "comment": [0 for _ in datel], }) df = df[df["date"].isin(opendate)] df = df.reset_index(drop=True) self.price = df[df["date"] <= yesterdaydash()]
def _basic_init(self): self._page = rget(self._url) if self._page.status_code == 404: raise ParserFailure( "Unrecognized fund, please check fund code you input.") if self._page.text[:800].find("Data_millionCopiesIncome") >= 0: raise FundTypeError( "This code seems to be a mfund, use mfundinfo instead") l = re.match(r"[\s\S]*Data_netWorthTrend = ([^;]*);[\s\S]*", self._page.text).groups()[0] l = l.replace("null", "None") # 暂未发现基金净值有 null 的基金,若有,其他地方也很可能出问题! l = eval(l) ltot = re.match(r"[\s\S]*Data_ACWorthTrend = ([^;]*);[\s\S]*", self._page.text).groups()[0] # .* doesn't match \n ltot = ltot.replace("null", "None") ## 096001 总值数据中有 null! ltot = eval(ltot) ## timestamp transform tzinfo must be taken into consideration tz_bj = dt.timezone(dt.timedelta(hours=8)) infodict = { "date": [ dt.datetime.fromtimestamp(int(d["x"]) / 1e3, tz=tz_bj).replace(tzinfo=None) for d in l ], "netvalue": [float(d["y"]) for d in l], "comment": [_nfloat(d["unitMoney"]) for d in l], } if len(l) == len(ltot): # 防止总值和净值数据量不匹配,已知有该问题的基金:502010 infodict["totvalue"] = [d[1] for d in ltot] try: rate = float( eval( re.match(r"[\s\S]*fund_Rate=([^;]*);[\s\S]*", self._page.text).groups()[0])) except ValueError: rate = 0 logger.info( "warning: this fund has no data for rate") # know cases: ETF name = eval( re.match(r"[\s\S]*fS_name = ([^;]*);[\s\S]*", self._page.text).groups()[0]) self.rate = rate # shengou rate in tiantianjijin, daeshengou rate discount is not considered self.name = name # the name of the fund df = pd.DataFrame(data=infodict) df = df[df["date"].isin(opendate)] df = df.reset_index(drop=True) if len(df) == 0: raise ParserFailure("no price table found for this fund %s" % self.code) self.price = df[df["date"] <= yesterdaydash()] # deal with the redemption fee attrs finally if not self.priceonly: self._feepreprocess()
def _basic_init(self): self._page = _download(self._url) if self._page.text[:800].find("Data_millionCopiesIncome") >= 0: raise FundTypeError( "This code seems to be a mfund, use mfundinfo instead") l = eval( re.match(r".*Data_netWorthTrend = ([^;]*);.*", self._page.text).groups()[0]) ltot = eval( re.match(r".*Data_ACWorthTrend = ([^;]*);.*", self._page.text).groups()[0]) ## timestamp transform tzinfo must be taken into consideration tz_bj = dt.timezone(dt.timedelta(hours=8)) infodict = { "date": [ dt.datetime.fromtimestamp(int(d["x"]) / 1e3, tz=tz_bj).replace(tzinfo=None) for d in l ], "netvalue": [float(d["y"]) for d in l], "comment": [_nfloat(d["unitMoney"]) for d in l], } if len(l) == len(ltot): # 防止总值和净值数据量不匹配,已知有该问题的基金:502010 infodict["totvalue"] = [d[1] for d in ltot] try: rate = float( eval( re.match(r".*fund_Rate=([^;]*);.*", self._page.text).groups()[0])) except ValueError: rate = 0 if _do_print_warning: print("warning: this fund has no data for rate" ) # know cases: 510030 name = eval( re.match(r".*fS_name = ([^;]*);.*", self._page.text).groups()[0]) self.rate = rate # shengou rate in tiantianjijin, daeshengou rate discount is not considered self.name = name # the name of the fund df = pd.DataFrame(data=infodict) df = df[df["date"].isin(opendate)] df = df.reset_index(drop=True) self.price = df[df["date"] <= yesterdaydash()] # deal with the redemption fee attrs finally self._feepreprocess()
def _basic_init(self): self._page = _download(self._url) if self._page.text[:800].find("Data_fundSharesPositions") >= 0: raise FundTypeError("This code seems to be a fund, use fundinfo instead") parser = Parser() tree = parser.parse(self._page.text) nodenet = [ node.children()[0].children()[1] for node in nodevisitor.visit(tree) if isinstance(node, ast.VarStatement) and node.children()[0].children()[0].value == "Data_millionCopiesIncome" ][0] name = [ node.children()[0].children()[1] for node in nodevisitor.visit(tree) if isinstance(node, ast.VarStatement) and (node.children()[0].children()[0].value == "fS_name") ][0] self.name = name.value.strip('"') tz_bj = dt.timezone(dt.timedelta(hours=8)) datel = [ dt.datetime.fromtimestamp( int(nodenet.children()[i].children()[0].value) / 1e3, tz=tz_bj ).replace(tzinfo=None) for i in range(len(nodenet.children())) ] ratel = [ float(nodenet.children()[i].children()[1].value) for i in range(len(nodenet.children())) ] netvalue = [1] for dailyrate in ratel: netvalue.append(netvalue[-1] * (1 + dailyrate * 1e-4)) netvalue.remove(1) df = pd.DataFrame( data={ "date": datel, "netvalue": netvalue, "totvalue": netvalue, "comment": [0 for _ in datel], } ) df = df[df["date"].isin(opendate)] df = df.reset_index(drop=True) self.price = df[df["date"] <= yesterdaydash()]
def __init__( self, code, round_label=0, dividend_label=0, fetch=False, save=False, path="", form="csv", priceonly=False, ): if round_label == 1 or (code in droplist): label = 1 # the scheme of round down on share purchase else: label = 0 if code.startswith("F") and code[1:].isdigit(): code = code[1:] elif code.startswith("M") and code[1:].isdigit(): raise FundTypeError( "This code seems to be a mfund, use ``mfundinfo`` instead") self._url = ("http://fund.eastmoney.com/pingzhongdata/" + code + ".js" ) # js url api for info of certain fund self._feeurl = ("http://fund.eastmoney.com/f10/jjfl_" + code + ".html" ) # html url for trade fees info of certain fund self.priceonly = priceonly super().__init__( code, fetch=fetch, save=save, path=path, form=form, round_label=label, dividend_label=dividend_label, ) self.special = self.price[self.price["comment"] != 0] self.specialdate = list(self.special["date"]) # date with nonvanishing comment, usually fenhong or zhesuan try: self.fenhongdate = list( self.price[self.price["comment"] > 0]["date"]) self.zhesuandate = list( self.price[self.price["comment"] < 0]["date"]) except TypeError: print("There are still string comments for the fund!")
def _fetch_sql(self, path): """ fetch the information and pricetable from sql, not recommend to use manually, just set the fetch label to be true when init the object :param path: engine object from sqlalchemy """ try: content = pd.read_sql("xa" + self.code, path) pricetable = content.iloc[1:] commentl = [float(com) for com in pricetable.comment] self.price = pricetable[["date", "netvalue", "totvalue"]] self.price["comment"] = commentl saveinfo = json.loads(content.iloc[0].comment) if not isinstance(saveinfo, dict): raise FundTypeError("This csv doesn't looks like from fundinfo") self.segment = saveinfo["segment"] self.feeinfo = saveinfo["feeinfo"] self.name = saveinfo["name"] self.rate = saveinfo["rate"] except exc.ProgrammingError as e: # print('no saved copy of %s' % self.code) raise e
def _fetch_csv(self, path): """ fetch the information and pricetable from path+code.csv, not recommend to use manually, just set the fetch label to be true when init the object :param path: string of folder path """ try: content = pd.read_csv(path + self.code + ".csv") pricetable = content.iloc[1:] datel = list(pd.to_datetime(pricetable.date)) self.price = pricetable[["netvalue", "totvalue", "comment"]] self.price["date"] = datel saveinfo = json.loads(content.iloc[0].date) if not isinstance(saveinfo, dict): raise FundTypeError("This csv doesn't looks like from fundinfo") self.segment = saveinfo["segment"] self.feeinfo = saveinfo["feeinfo"] self.name = saveinfo["name"] self.rate = saveinfo["rate"] except FileNotFoundError as e: # print('no saved copy of fund %s' % self.code) raise e
def _basic_init(self): self._page = _download(self._url) if self._page.text[:800].find("Data_millionCopiesIncome") >= 0: raise FundTypeError("This code seems to be a mfund, use mfundinfo instead") parser = Parser() # parse the js text of API page using slimit module tree = parser.parse(self._page.text) nodenet = [ node.children()[0].children()[1] for node in nodevisitor.visit(tree) if isinstance(node, ast.VarStatement) and node.children()[0].children()[0].value == "Data_netWorthTrend" ][0] nodetot = [ node.children()[0].children()[1] for node in nodevisitor.visit(tree) if isinstance(node, ast.VarStatement) and node.children()[0].children()[0].value == "Data_ACWorthTrend" ][0] ## timestamp transform tzinfo must be taken into consideration tz_bj = dt.timezone(dt.timedelta(hours=8)) infodict = { "date": [ dt.datetime.fromtimestamp( int(nodenet.children()[i].children()[0].right.value) / 1e3, tz=tz_bj ).replace(tzinfo=None) for i in range(len(nodenet.children())) ], "netvalue": [ float(nodenet.children()[i].children()[1].right.value) for i in range(len(nodenet.children())) ], "comment": [ _nfloat(nodenet.children()[i].children()[3].right.value) for i in range(len(nodenet.children())) ], } if len(nodenet.children()) == len( nodetot.children() ): # 防止总值和净值数据量不匹配,已知有该问题的基金:502010 infodict["totvalue"] = [ float(nodetot.children()[i].children()[1].value) for i in range(len(nodenet.children())) ] rate = [ node.children()[0].children()[1] for node in nodevisitor.visit(tree) if isinstance(node, ast.VarStatement) and (node.children()[0].children()[0].value == "fund_Rate") ][0] name = [ node.children()[0].children()[1] for node in nodevisitor.visit(tree) if isinstance(node, ast.VarStatement) and (node.children()[0].children()[0].value == "fS_name") ][0] self.rate = float( rate.value.strip('"') ) # shengou rate in tiantianjijin, daeshengou rate discount is not considered self.name = name.value.strip('"') # the name of the fund df = pd.DataFrame(data=infodict) df = df[df["date"].isin(opendate)] df = df.reset_index(drop=True) self.price = df[df["date"] <= yesterdaydash()] # deal with the redemption fee attrs finally self._feepreprocess()