Beispiel #1
0
 def query(self, sql):
     with self.conn.cursor() as cursor:
         cursor.execute(smart_encode(sql))
         columns = smart_decode([_[0] for _ in cursor.description])
         rows = smart_decode([dict(zip(columns, _)) for _ in cursor])
         cursor.close()
         self.conn.close()
         return rows
Beispiel #2
0
 def total(self, sql):
     self.cursor = self.conn.cursor()
     self.cursor.execute(smart_encode(sql))
     columns = [smart_decode(row["columnName"]) for row in self.cursor.getSchema()]
     rows = [dict(zip(columns, [smart_decode(cell) for cell in row])) for row in self.cursor]
     if rows:
         return rows[0][columns[0]]
     else:
         return 0
Beispiel #3
0
 def fetch_raw(self, sql):
     self.cursor = self.conn.cursor()
     self.cursor.execute(smart_encode(sql))
     rows = []
     for row in self.cursor:
         rows.append("\t".join([smart_decode(_, cast=True) if _ else "" for _ in row]))
     return rows
Beispiel #4
0
def cast_hound():
    h = Hound()
    data = smart_decode(pickle_load("train_data"), cast=True)[:10]
    h.x_predict, h.y_predict = split_x_y(data)
    h.load_model()
    h.predict()
    h.print_info()
    data = list(map(lambda x, y: x + y, [line[:4] for line in data], h.prediction.astype(unicode).tolist()))
    for line in data:
        print(len("\t".join(line)))
Beispiel #5
0
    def query(self, sql, meta=False, to_dict=True):
        """
        :param sql:
        :param meta: True的时候同时返回表头信息
        :param to_dict: True将返回字典类型, False返回列表类型
        :return:
        """
        self.cursor = self.conn.cursor()
        self.cursor.execute(smart_encode(sql))
        columns = [smart_decode(row["columnName"]) for row in self.cursor.getSchema()]
        if to_dict:
            rows = [dict(zip(columns, [smart_decode(cell) for cell in row])) for row in self.cursor]
        else:
            rows = smart_decode([row for row in self.cursor])

        if meta:
            return rows, columns
        else:
            return rows
Beispiel #6
0
 def query(self, sql, dict_cursor=True, fetchone=False, cast=True):
     self.cursor = self.conn.cursor(
         MySQLdb.cursors.DictCursor) if dict_cursor else conn.cursor()
     self.cursor.execute(sql)
     ret = [self.cursor.fetchone()] if fetchone else list(
         self.cursor.fetchall())
     if cast:
         return smart_decode(ret)
     else:
         return ret
Beispiel #7
0
    def query(self, operation, parameters=None, fetchone=False):
        if not self.in_transaction:
            self.begin()
        # try:
        self.cursor.execute(operation, parameters) if parameters else self.cursor.execute(operation)
        rows = [self.cursor.fetchone()] if fetchone else self.cursor.fetchall()

        # except:
            # traceback.print_exc()
            # self.cursor.close()
            # self.pool.putconn(self.conn, close=True)
            # return False
        # else:
        self.cursor.close()
        self.pool.putconn(self.conn)
        return [smart_decode(dict(r)) for r in rows]
Beispiel #8
0
reload(sys)
sys.setdefaultencoding("utf8")

keywords = [
    "此用户没有填写评论",
]


def swap(lst):
    return [lst[1], lst[0]]


try:
    data = [line for line in sys.stdin]
    data = list(
        map(
            lambda x: swap(
                smart_decode(x).replace("\n", "").replace("\r", "").replace(
                    "\\N", "").split("\t")), data))
    dd = DataDenoiser(data=data, content_index=1, head=["id", "content"])
    dd.use_keywords = True
    dd.noise_keywords_list = keywords
    dd.udf_support = True
    dd.run()
except Exception as e:
    print("\t".join([
        "ERROR",
        traceback.format_exc().replace("\t", " ").replace("\n", " ")
    ]))
Beispiel #9
0
import sys

reload(sys)
sys.setdefaultencoding("utf8")


keywords = [
    '通知分享',
    '通知通知',
    '通知出停诊',
    '停诊通知',
    '填表',
    '预约转诊服务',
]


def swap(lst):
    return [lst[1], lst[0]]


try:
    data = [line for line in sys.stdin]
    data = list(map(lambda x: swap(smart_decode(x).replace("\n", "").replace("\r", "").replace("\\N", "").split("\t")), data))
    dd = DataDenoiser(data=data, content_index=1, head=["id", "content"])
    dd.use_keywords = True
    dd.noise_keywords_list = keywords
    dd.udf_support = True
    dd.run()
except Exception as e:
    print("\t".join(["ERROR", traceback.format_exc().replace("\t", " ").replace("\n", " ")]))
Beispiel #10
0
from mplib.pricing.helper import split_id_feature, split_x_y
from mplib.pricing import SKAssess
from mplib.common import smart_decode
from mplib.IO import Hive
from mplib import *
import traceback
import sys

if __name__ == "__main__":
    reload(sys)
    sys.setdefaultencoding("utf8")

    try:
        a = SKAssess()
        a.category_id = sys.argv[1]
        a.interval = smart_decode(sys.argv[2])
        sql = "SELECT itemid AS itemid, data AS data FROM elengjing_price.tmp_{0} LIMIT 10".format(
            a.category_id)
        data = Hive("idc").query(sql)
        data = [
            "\t".join([str(line.get("itemid")),
                       str(line.get("data"))]) for line in data
        ]
        data = list(
            map(
                lambda x: smart_decode(x).replace("\n", "").replace("\r", "").
                split("\t"), data))
        items, data = split_id_feature(data)
        a.x_predict, a.y_predict = split_x_y(data)
        print(a.y_predict.shape)
        print(a.x_predict.shape)
Beispiel #11
0
    ("促销",
     "预售|反季清仓|促销活动|大促销|试用装|赶紧行动|选购|赠品|热销|特价|限时|批发|大型展卖|促销商品|折上折|特价场|还在等什么|活动品牌|全场品牌|需要的抓紧|优惠劵大放送|体验价|优惠多多|购物狂欢节|限时优惠|特价处理|清仓|断码|原价|降价|打折|限时|低价|特卖|满购|聚划算|购物满|促销|优惠价|男女同款|淘宝只卖|甩卖价|天猫商城价|现价只有|最终售价|感恩节折扣|即可获得优惠|新品预售|双十一狂欢价|甩卖|开抢|快来抢|更多优惠活动等着你|全清价|每件立减|0元换购|精选上市|大减价|大处理|清场甩|优惠等你来|喜迎双11|限时\w{0,1}折|大销价|购实惠|购满+送|送+价值|开学巨献|开业价|全场\w{0,3}折|满\w{0,3}减|件W{0,3}折|全场\w{0,2}减|买\w{0,2}减|折处理|折优惠|元优惠券|包顺风|送.*元红包|分享有礼送|一大波.*来袭|买\D{0,5}送|低至\w{0,5}折|买1送1|买一送一|风衣特价|买任意款|加100元换购|加1元送一|码特价|热销价|折扣价|疯狂价|优惠价|男女同款|淘宝只卖|甩卖价|天猫商城价|现价只有|最终售价|感恩节折扣|即可获得优惠|新品预售|双十一狂欢价|甩卖|开抢|快来抢|更多优惠活动等着你|全清价|每件立减|0元换购|精选上市|大减价|大处理|清场甩|优惠等你来|喜迎双11|特价折扣|抢购|商场特卖|国庆特惠|购物节|活动时间|(春|夏|秋|冬)装折扣"
     ),
    ("投票活动",
     "\【|\】|\〖|\〗|\《|\》|我是雷锋|帮忙投票|好礼等你来拿|活动推荐给大家|活动详见|有机会赢|有奖竞猜|详情(请)*点击|活动热线|现在就来参加|只要分享|就有机会获得|微博抽奖|获得好礼|即可赢得|有机会获得|获奖名单|就有机会中奖|赢取|即可获赠|礼品等着您|圆满落幕|有机会赢得|报名活动|幸运奖品|诚邀您参与|有奖活动|的大力支持|快来报名吧|火热报名中|狂欢季|活动地点|活动内容|快来领取|分享你喜欢的|一起来分享吧|免费领取|活动如下:|活动请关注|关注最新活动|免费试用|线上专供|报名网址|本次活动|活动开始|红包大派送|众多好礼|拿大奖|免费大派发|踊跃投票|欢迎\D{0,1}报名参加|大力支持|活动二|小编注意到|立即申请|即可参与抽奖|敬请期待|有机会赢|有奖竞猜|详情(请)*点击|活动热线|现在就来参加|只要分享|就有机会获得|微博抽奖|获得好礼|即可赢得|有机会获得|获奖名单|就有机会中奖|赢取|即可获赠|礼品等着您|圆满落幕|有机会赢得|报名活动|幸运奖品|活动最后一天|赢取大奖|活动将送|店庆|有机会获得|活动内容详见|等你来挑战|即日起,"
     ),
    ("科普",
     "(①|②|③|1\.|2\.|1\、|2\、|图1|图2|1\)| 2\)|一\、|二\、|三\、)|情感好文|今天就介绍大家|今天就介绍给大家|最新发现|研究指出|数据证明|研究发现|注意事项|小贴士|本款|分享给大家|请关注@|小编给大家推荐"
     ), ("新闻", "据媒体报道|开幕首日|隆重举行|活动现场|启动仪式|商业活动中|娱乐|日发售"),
    ("灌水", "顶上去|66666|牛牛牛|顶一个"),
    ("分享",
     "转走|转载|转发|成功分享到|大家帮忙多转|豪礼|一张图片测试|性格测试题|很火的心理测试|您敢挑战吗|异性眼中的你|招聘测试题|变态测试题|请朋友们留意|原文地址|看客推荐|让你在人群中|别怪我不告诉你|转发此条微博|详情点击|分享一款|供大家参考|给大家分享我|小说|转发此微博|分享赢|最变态招聘测试题"
     ), ("其他", "此用户暂时被停用|宜忌")
]

for line in sys.stdin:
    try:
        line = smart_decode(line).replace("\n", "").replace("\r", "").replace(
            "\\N", "").split("\t")
        if len(line) <= 1:
            continue
        line[0] = mother_baby_denoise(line[0], l)
        print(smart_encode("\t".join(list(reversed(line)))))
        print(smart_encode(line[0]))
    except Exception as e:
        print(
            smart_encode("\t".join([
                traceback.format_exc().replace("\t", " ").replace("\n", " "),
                "ERROR"
            ])))
Beispiel #12
0
def train_hound():
    h = Hound()
    h.x_train, h.y_train = split_x_y(smart_decode(pickle_load("train_data"), cast=True))
    h.train()
    h.save_model()
    h.print_info()
Beispiel #13
0
def gold_miner_hive_test():
    gm = GoldMiner()
    gm.data = smart_decode(pickle_load("raw_data"), cast=True)
    gm.pan()
    print(len(gm.data))
    gm.smelt()
Beispiel #14
0
 def get_head(self, head):
     if isinstance(head, list) or isinstance(head, tuple):
         return smart_decode(head)
     elif head is None:
         return
Beispiel #15
0
 def process(data):
     if isinstance(data, list) or isinstance(data, tuple):
         return smart_decode(data, cast=True)
     elif isinstance(data, pandas.DataFrame):
         return smart_decode(data.values.tolist(), cast=True)
Beispiel #16
0
# coding: utf-8
# __author__: u"John"
from __future__ import unicode_literals, absolute_import, print_function, division
from mplib.competition import GoldMiner
from mplib.common import smart_decode
import traceback
import sys

if __name__ == "__main__":
    reload(sys)
    sys.setdefaultencoding("utf8")

    try:
        data = [line for line in sys.stdin]
        data = list(
            map(
                lambda x: smart_decode(x).replace("\n", "").replace("\r", "").
                split("\t"), data))
        gm = GoldMiner()
        gm.category_id = sys.argv[1]
        gm.data = data
        gm.pan()
        gm.refine()
        gm.smelt()

    except Exception as e:
        print("\t".join([traceback.format_exc(), "ERROR", "ERROR", "ERROR"]))