def cal(self): origin = load("info") df = origin.drop_duplicates(["brand", "model", "tag"]) df = df.groupby(["tag"]).apply(self.cal_aver_top) df = df.drop_duplicates( ["tag"])[["tag", "top_tag", "aver_model_tag_ratings"]] dump(pd.merge(origin, df, "left", on=["tag"]), "info")
def rank(self): origin = load("info") df = origin[["brand", "model", "model_wilson"]].drop_duplicates(["brand", "model"]) df = self.rank_func(df, "model_wilson", "model_rank") del df["model_wilson"] dump(pd.merge(origin, df, "left", on=["brand", "model"]), "info")
def evaluate(self): origin = load("info") df = origin.drop_duplicates(["brand", "model"]) for k, v in df.iterrows(): df.at[k, "model_wilson"] = self.evaluate_score( v["model_n"], v["model_p"], self.z) df = df[["brand", "model", "model_wilson"]] dump(pd.merge(origin, df, "left", on=["brand", "model"]), "info")
def get(self): pcid, cid = Entrance().params fname = FileBase.info.format(name="comments", pcid=pcid, cid=cid) fields = ["brand", "model", "tag", "target", "grade", "frequency", "datamonth"] field, table = ", ".join(fields), "comment.review_analysis_pcid{pcid}_cid{cid}".format(pcid=pcid, cid=cid) sql = "SELECT {field} FROM {table} WHERE cid='{cid}';".format( field=field, table=table, cid=cid) df = read_data(self.src, fname=fname, sql=sql, db="report_dg") dump(df, "info")
def rank(self): origin = load("info") df = origin[["brand", "model", "tag", "tag_wilson"]].drop_duplicates(["brand", "model", "tag"]) df = df.groupby(["tag"]).apply(self.rank_func, "tag_wilson", "model_tag_rank") del df["tag_wilson"] dump(pd.merge(origin, df, "left", on=["brand", "model", "tag"]), "info")
def cal(self): origin = load("info") df = origin.drop_duplicates(["brand", "model", "tag"])[ ["brand", "model", "tag_u", "tag_v"]] df = df.groupby(["brand", "model"]).apply( self.cal_baseline) del df["tag_u"], df["tag_v"] df = df.drop_duplicates(["brand", "model"]) dump(pd.merge(origin, df, "left", on=["brand", "model"]), "info")
def rank(self): origin = load("info") df = origin[["brand", "model", "tag", "target", "target_score"]] df = df.groupby(["target"]).apply(self.rank_func, "target_score", "model_target_rank") del df["target_score"] dump( pd.merge(origin, df, "left", on=["brand", "model", "tag", "target"]), "info")
def adjust(self): df = load("info") self.make_record(datamonths=set(df["datamonth"].values)) self.publish = min(self.record.values()) for k, v in df.iterrows(): df.at[k, "frequency"] = v["frequency"] * math.e**( (self.record[v["datamonth"]] - self.publish) / (self.current - self.publish)) # df.at[k, "frequency"] = v["frequency"] * ( # (self.record[v["datamonth"]] - self.publish) / (self.current - self.publish)) del df["datamonth"] dump(df, "info")
def select(self): df = load("info") dump(df, "wholeInfo") df = df[[ "brand", "model", "tag", "target", "model_score", "tag_score", "target_score", "model_ratings", "model_tag_ratings", "model_target_ratings", "model_rank", "model_tag_rank", "model_target_rank", "aver_model_ratings", "aver_model_tag_ratings", "aver_model_target_ratings", "top_model", "top_tag", "top_target" ]] dump(df, "info")
def get(self): pcid, cid = Entrance().params fname = FileBase.info.format(name="comments", pcid=pcid, cid=cid) fields = ["brand", "model", "tag", "target", "grade", "frequency", "datamonth"] field, table = ", ".join(fields), "comment.review_analysis_pcid{pcid}_cid{cid}".format(pcid=pcid, cid=cid) sql = "SELECT {field} FROM {table} WHERE cid='{cid}';".format( field=field, table=table, cid=cid) temp = read_data(self.src, fname=fname, sql=sql, db="report_dg") df = pd.DataFrame() for k in Parameters.tagList: row_index = temp[temp['tag'] == k] df = pd.concat([df, row_index]) dump(df, "info")
def cal(self): df = load("info") for inx, row in df.iterrows(): if 1 == row["grade"]: df.at[inx, "u"] = row["frequency"] df.at[inx, "v"] = 0 elif -1 == row["grade"]: df.at[inx, "u"] = 0 df.at[inx, "v"] = row["frequency"] del df["frequency"], df["grade"] df = df.groupby(["brand", "model", "tag", "target"]).apply( self.cal_baseline) del df["u"], df["v"] df = df.drop_duplicates(["brand", "model", "tag", "target"]) dump(df, "info")
def evaluate(self): df = load("info") for k, v in df.iterrows(): df.at[k, "target_wilson"] = self.evaluate_score( v["target_n"], v["target_p"], self.z) dump(df, "info")
def adjust(self): df = load("info") del df["datamonth"] dump(df, "info")
def trans(self): df = load("info") df = df.groupby(["brand", "model"]).apply(self.trans_func) del df["tag"] df = df.drop_duplicates(["brand", "model"]) dump(df, "info")
def cal(self): df = load("info") df = df.groupby(["tag", "target"]).apply(self.cal_aver_top) dump(df, "info")
def cal(self): origin = load("info") df = origin.drop_duplicates(["brand", "model"]) top, aver = self.cal_aver_top(df) origin["top_model"], origin["aver_model_ratings"] = top, aver dump(origin, "info")
def cal(self): df = load("info") df = df.groupby(["brand", "model", "tag"]).apply( self.cal_baseline) dump(df, "info")