Esempio n. 1
0
 def fetch_raw(self, sql):
     self.cursor = self.conn.cursor()
     self.cursor.execute(smart_encode(sql))
     rows = []
     for row in self.cursor:
         rows.append("\t".join([smart_decode(_, cast=True) if _ else "" for _ in row]))
     return rows
Esempio n. 2
0
 def execute(self, sql):
     with self.conn.cursor() as cursor:
         for line in [_.strip() for _ in sql.split(";") if _.strip()]:
             cursor.execute(smart_encode(line))
         cursor.close()
         self.conn.close()
         return True
Esempio n. 3
0
 def query(self, sql):
     with self.conn.cursor() as cursor:
         cursor.execute(smart_encode(sql))
         columns = smart_decode([_[0] for _ in cursor.description])
         rows = smart_decode([dict(zip(columns, _)) for _ in cursor])
         cursor.close()
         self.conn.close()
         return rows
Esempio n. 4
0
 def total(self, sql):
     self.cursor = self.conn.cursor()
     self.cursor.execute(smart_encode(sql))
     columns = [smart_decode(row["columnName"]) for row in self.cursor.getSchema()]
     rows = [dict(zip(columns, [smart_decode(cell) for cell in row])) for row in self.cursor]
     if rows:
         return rows[0][columns[0]]
     else:
         return 0
Esempio n. 5
0
def get_distance(dummy_head, word_vector, size, data):
    customer_dummy = numpy.array(data[3].split(","))
    target_dummy = numpy.array(data[4].split(","))
    customer_attr = data[5].split(",")
    target_attr = data[6].split(",")
    customer_vector = build_base_feature(dummy_head, word_vector,
                                         customer_dummy, customer_attr, size)
    target_vector = build_base_feature(dummy_head, word_vector, target_dummy,
                                       target_attr, size)
    l = list(range(len(customer_vector)))
    feature = [
        numpy.nan_to_num(d(customer_vector[i], target_vector[i])) for i in l
        for d in gen_distance_method()
    ]
    return data[:3] + [",".join(smart_encode(feature, cast=True))] + data[7:]
Esempio n. 6
0
    def query(self, sql, meta=False, to_dict=True):
        """
        :param sql:
        :param meta: True的时候同时返回表头信息
        :param to_dict: True将返回字典类型, False返回列表类型
        :return:
        """
        self.cursor = self.conn.cursor()
        self.cursor.execute(smart_encode(sql))
        columns = [smart_decode(row["columnName"]) for row in self.cursor.getSchema()]
        if to_dict:
            rows = [dict(zip(columns, [smart_decode(cell) for cell in row])) for row in self.cursor]
        else:
            rows = smart_decode([row for row in self.cursor])

        if meta:
            return rows, columns
        else:
            return rows
Esempio n. 7
0
def make_similarity_feature(tag_dict, row):
    attr1 = attributes_to_dict(row[0])
    attr2 = attributes_to_dict(row[1])
    feature = []

    for attr_name, attr_value_list in iteritems(tag_dict):
        similarity = 0.
        if attr_name == "材质成分":
            md1 = material_string_to_dict(attr1.get(attr_name))
            md2 = material_string_to_dict(attr2.get(attr_name))
            mdi = set(list(md1)) & set(list(md2))
            if len(mdi) > 0:
                material_similar_score = similarity
                for i in mdi:
                    material_similar_score += min(md1.get(i), md2.get(i))
                    feature.append(round(material_similar_score / 100, 4))
            else:
                feature.append(similarity)
            continue

        try:
            s = set(attr_value_list)
            a1 = set(attr1[attr_name])
            a2 = set(attr2[attr_name])
        except:
            feature.append(similarity)
            continue

        set1 = s & a1
        set2 = s & a2

        try:
            feature.append(round(len(set1 & set2) / len(set1 | set2), 4))
        except ZeroDivisionError:
            feature.append(similarity)

    return row[2:4] + [",".join(smart_encode(feature, cast=True))] + row[4:]
Esempio n. 8
0
    ("促销",
     "预售|反季清仓|促销活动|大促销|试用装|赶紧行动|选购|赠品|热销|特价|限时|批发|大型展卖|促销商品|折上折|特价场|还在等什么|活动品牌|全场品牌|需要的抓紧|优惠劵大放送|体验价|优惠多多|购物狂欢节|限时优惠|特价处理|清仓|断码|原价|降价|打折|限时|低价|特卖|满购|聚划算|购物满|促销|优惠价|男女同款|淘宝只卖|甩卖价|天猫商城价|现价只有|最终售价|感恩节折扣|即可获得优惠|新品预售|双十一狂欢价|甩卖|开抢|快来抢|更多优惠活动等着你|全清价|每件立减|0元换购|精选上市|大减价|大处理|清场甩|优惠等你来|喜迎双11|限时\w{0,1}折|大销价|购实惠|购满+送|送+价值|开学巨献|开业价|全场\w{0,3}折|满\w{0,3}减|件W{0,3}折|全场\w{0,2}减|买\w{0,2}减|折处理|折优惠|元优惠券|包顺风|送.*元红包|分享有礼送|一大波.*来袭|买\D{0,5}送|低至\w{0,5}折|买1送1|买一送一|风衣特价|买任意款|加100元换购|加1元送一|码特价|热销价|折扣价|疯狂价|优惠价|男女同款|淘宝只卖|甩卖价|天猫商城价|现价只有|最终售价|感恩节折扣|即可获得优惠|新品预售|双十一狂欢价|甩卖|开抢|快来抢|更多优惠活动等着你|全清价|每件立减|0元换购|精选上市|大减价|大处理|清场甩|优惠等你来|喜迎双11|特价折扣|抢购|商场特卖|国庆特惠|购物节|活动时间|(春|夏|秋|冬)装折扣"
     ),
    ("投票活动",
     "\【|\】|\〖|\〗|\《|\》|我是雷锋|帮忙投票|好礼等你来拿|活动推荐给大家|活动详见|有机会赢|有奖竞猜|详情(请)*点击|活动热线|现在就来参加|只要分享|就有机会获得|微博抽奖|获得好礼|即可赢得|有机会获得|获奖名单|就有机会中奖|赢取|即可获赠|礼品等着您|圆满落幕|有机会赢得|报名活动|幸运奖品|诚邀您参与|有奖活动|的大力支持|快来报名吧|火热报名中|狂欢季|活动地点|活动内容|快来领取|分享你喜欢的|一起来分享吧|免费领取|活动如下:|活动请关注|关注最新活动|免费试用|线上专供|报名网址|本次活动|活动开始|红包大派送|众多好礼|拿大奖|免费大派发|踊跃投票|欢迎\D{0,1}报名参加|大力支持|活动二|小编注意到|立即申请|即可参与抽奖|敬请期待|有机会赢|有奖竞猜|详情(请)*点击|活动热线|现在就来参加|只要分享|就有机会获得|微博抽奖|获得好礼|即可赢得|有机会获得|获奖名单|就有机会中奖|赢取|即可获赠|礼品等着您|圆满落幕|有机会赢得|报名活动|幸运奖品|活动最后一天|赢取大奖|活动将送|店庆|有机会获得|活动内容详见|等你来挑战|即日起,"
     ),
    ("科普",
     "(①|②|③|1\.|2\.|1\、|2\、|图1|图2|1\)| 2\)|一\、|二\、|三\、)|情感好文|今天就介绍大家|今天就介绍给大家|最新发现|研究指出|数据证明|研究发现|注意事项|小贴士|本款|分享给大家|请关注@|小编给大家推荐"
     ), ("新闻", "据媒体报道|开幕首日|隆重举行|活动现场|启动仪式|商业活动中|娱乐|日发售"),
    ("灌水", "顶上去|66666|牛牛牛|顶一个"),
    ("分享",
     "转走|转载|转发|成功分享到|大家帮忙多转|豪礼|一张图片测试|性格测试题|很火的心理测试|您敢挑战吗|异性眼中的你|招聘测试题|变态测试题|请朋友们留意|原文地址|看客推荐|让你在人群中|别怪我不告诉你|转发此条微博|详情点击|分享一款|供大家参考|给大家分享我|小说|转发此微博|分享赢|最变态招聘测试题"
     ), ("其他", "此用户暂时被停用|宜忌")
]

for line in sys.stdin:
    try:
        line = smart_decode(line).replace("\n", "").replace("\r", "").replace(
            "\\N", "").split("\t")
        if len(line) <= 1:
            continue
        line[0] = mother_baby_denoise(line[0], l)
        print(smart_encode("\t".join(list(reversed(line)))))
        print(smart_encode(line[0]))
    except Exception as e:
        print(
            smart_encode("\t".join([
                traceback.format_exc().replace("\t", " ").replace("\n", " "),
                "ERROR"
            ])))
Esempio n. 9
0
def gen_experiment_logs_head():
    info = ",".join(gen_print_var())
    with open(get_experiment_logs_path("experiment_head.csv"), mode="w") as f:
        f.write(smart_encode(info + "\n"))
Esempio n. 10
0
def logging_process(locals_var):
    var_dict = get_print_var(locals_var, gen_print_var())
    info = ",".join([str(v) for k, v in iteritems(var_dict)])
    with open(get_experiment_logs_path("experiment_logs.csv"), mode="a") as f:
        f.write(smart_encode(info + "\n"))
Esempio n. 11
0
    ("科普",
     "(①|②|③|1\.|2\.|1\、|2\、|图1|图2|1\)| 2\)|一\、|二\、|三\、)|情感好文|今天就介绍大家|今天就介绍给大家|最新发现|研究指出|数据证明|研究发现|注意事项|小贴士|本款|分享给大家|请关注@|小编给大家推荐"
     ), ("新闻", "据媒体报道|开幕首日|隆重举行|活动现场|启动仪式|商业活动中|娱乐|日发售"),
    ("灌水", "顶上去|66666|牛牛牛|顶一个"),
    ("分享",
     "转走|转载|转发|成功分享到|大家帮忙多转|豪礼|一张图片测试|性格测试题|很火的心理测试|您敢挑战吗|异性眼中的你|招聘测试题|变态测试题|请朋友们留意|原文地址|看客推荐|让你在人群中|别怪我不告诉你|转发此条微博|详情点击|分享一款|供大家参考|给大家分享我|小说|转发此微博|分享赢|最变态招聘测试题"
     ), ("其他", "此用户暂时被停用|宜忌")
]

for line in sys.stdin:
    line = smart_decode(line).replace("\n",
                                      "").replace("\r",
                                                  "").replace("\\N",
                                                              "").split("\t")
    try:
        id, channel, subject, post_id, title, tags, reply_count, view_count, collection_count, detail_url, content, is_best_answer, like_count, user_id, user_name, user_type, is_host, replied_user_id, replied_user_name, created_at, device, updated_at, baby_agethen, baby_days, floorid, noise, platform_id = line
        noise = mother_baby_denoise(content, l)
        print(
            smart_encode("\t".join([
                id, channel, subject, post_id, title, tags, reply_count,
                view_count, collection_count, detail_url, content,
                is_best_answer, like_count, user_id, user_name, user_type,
                is_host, replied_user_id, replied_user_name, created_at,
                device, updated_at, baby_agethen, baby_days, floorid, noise,
                platform_id
            ])))
    except Exception as e:
        print(
            smart_encode(traceback.format_exc().replace("\t", " ").replace(
                "\n", " ")))
Esempio n. 12
0
 def execute(self, sql):
     self.cursor = self.conn.cursor()
     for s in sql.split(";"):
         s = s.strip()
         if s:
             self.cursor.execute(smart_encode(s))
Esempio n. 13
0
 def get(self, sql):
     rows = self.query(smart_encode(sql))
     return rows[0] if len(rows) > 0 else None