Python PathIsExit Exemples, util_hadoop.PathIsExit Python Exemples

Exemple #1

0

Afficher le fichier

        col = db['TaMember_{cid}_Subscription'.format(cid=cid)]
        ls = col.find({
            'Period': int(period),
            'ChannelId': dictory[channel],
            'SendMode': 0
        })

        for x in ls:
            filters.append(x['MemberId'])
        client1.close()
        logger.add(
            'shopid={shopid} ,cid={cid} et_update_list get Subscription {c}.......'
            .format(shopid=shopid, cid=cid, c=len(filters)))

        merge_path = "/user/athena/{}/meb_attribute/{}".format(shopid, lsdate)
        if not util_hadoop.PathIsExit(merge_path):
            ls = util_hadoop.GetPath(merge_path.rsplit("/", 1)[0])
            lsdate = max(ls).rsplit('/', 1)[1]
            merge_path = "/user/athena/{}/meb_attribute/{}".format(
                shopid, lsdate)
            logger.add(
                'shopid={shopid} ,cid={cid} et_update_list get ta by last date {lsdate}.......'
                .format(shopid=shopid, cid=cid, lsdate=lsdate))

        SparkContext.setSystemProperty('spark.cores.max', '56')
        sc = SparkContext(appName="et_update_list_{}".format(shopid))

        def mapp(l):
            npt = l.split("\t")[12] if l.split("\t")[12] != 'cindy1' else 'S3'
            return {
                'memberid': l.split("\t")[1],

Exemple #2

0

Afficher le fichier

        for h in ls:
            n = n + 1
            ran = random.random()
            memberid, nes, npt, npd = h
            PR = PRD[npt]
            if npt <= D[storeid][0]:
                hml = "L"
            elif npt > D[storeid][1]:
                hml = "H"
            else:
                hml = "M"
            R.append("{storeid}\t{memberid}\t{nes}\t{hml}\t{npt}\t{PR}\t{order}\t{npd}\t{ran}".format(storeid=storeid, memberid=memberid, nes=nes, hml=hml, npt=npt, PR=PR, order=n, npd=npd, ran=ran))
        return R

    Result = R1.map(lambda l: (l[0].split('_')[0], \
    (l[0].split('_')[1], l[1][0] ,float(l[1][1][0]), l[1][1][1]) \
    )).groupByKey().map(lambda x : (x[0], list(x[1]))).map(lambda x : hml(x[0], x[1], D)).flatMap(lambda x : x)
    
    serial = Result.count()
    if serial > 0:
        if util_hadoop.PathIsExit(outpath):
            cmd = "hadoop fs -rm -r {}".format(outpath)
            subprocess.call(cmd, shell=True)
        try:
            Result.coalesce(10).saveAsTextFile(outpath)
        except:
            pass
        finally:
            pass
    sc.stop()

Exemple #3

0

Afficher le fichier

Fichier : et_npt_groups.py Projet : wuenhouse/Spark

    try:
        logger.add(
            'clientid={clientid}, setting={se}, groups seperate is init ....'.
            format(clientid=shopid, se=setting))
        R = getabgroup(setting, D)
        #spark init
        prefix = "hdfs://{hdfsmaster}:8020".format(hdfsmaster=MIGO_HDFS_MASTER)

        src = prefix + "/user/migo/starterDIY/{}/member/done/*".format(shopid)
        ta_parent = prefix + "/user/athena/{}/ta_done/{}".format(
            shopid, caldate)
        ta_path = ta_parent + "/*"

        #get laste ta
        if not util_hadoop.PathIsExit(ta_parent):
            ls = util_hadoop.GetPath(ta_parent.rsplit("/", 1)[0])
            ta_path = max(ls) + "/*"
            caldate = max(ls).rsplit("/", 1)[1]  #the laste hadoop ta date

        sc = SparkContext(appName="ab_group_init_{}".format(shopid))
        configpath = "/user/athena/{}/et_group/config".format(shopid)

        if util_hadoop.PathIsExit(configpath):
            cmd = "hadoop fs -rm -r {}".format(configpath)
            subprocess.call(cmd, shell=True)

        sc.parallelize(["{}\t{}".format(shopid, setting)
                        ]).coalesce(1).saveAsTextFile(configpath)

        members = sc.textFile(src)

Exemple #4

0

Afficher le fichier

    lsdate = caldate
    UPDATE = True

    try:
        prefix = "hdfs://{hdfsmaster}:8020".format(hdfsmaster=MIGO_HDFS_MASTER)
        merge_path = prefix + "/user/athena/{shop}/meb_attribute/{caldate}".format(shop=shopid, caldate=caldate)
        lsdate = caldate

        logger.add('clientid={clientid}, cid={cid}, et_query is init by {shopid} {caldate} {tag1} {cid} {start} {end} {type} {flag} {reset} {M} {F} {npt} {age} {gender} {first} {last} {groups} {top} {npd}' \
        .format(clientid=shopid, cid=cid, shopid=shopid, caldate=caldate, tag1=tag1, start=start, end=end, type=type, flag=flag ,reset=reset, M=M, F=F, npt=npt, age=age, gender=gender, \
                first=first, last=last, groups=groups, top=top, npd=npd ))

        CONTINUE = True
        MSG = ""

        if not util_hadoop.PathIsExit(merge_path):
            if util_hadoop.PathIsExit(merge_path.rsplit("/",1)[0]):
                ls = util_hadoop.GetPath(merge_path.rsplit("/",1)[0])
                merge_path = "hdfs:" + max(ls) + "/*"
                lsdate = max(ls).rsplit("/",1)[1]
            else:
                CONTINUE = False
                MSG = 'member cube is not exists...'
        else:
            merge_path = merge_path + "/*"

        client = pymongo.MongoClient(MIGO_MONGO_TA_URL, 27017)
        db = client['ET_' + shopid ]
        col = db['campaign']
        up = False 
        if flag == "0":

Exemple #5

0

Afficher le fichier

Fichier : et_update_member.py Projet : wuenhouse/Spark

    "BE_S3_W": "16777216,0,0"
}, {
    "BE_BUY_W": "2097152,0,0"
}]

if __name__ == "__main__":
    """
    error code :    1. ['-1']   => no ta_done file
    """
    try:
        shop = sys.argv[1]
        caldate = sys.argv[2]
        ta = "/user/athena/{shop}/ta_done/{caldate}".format(shop=shop,
                                                            caldate=caldate)
        MSG = ""
        if util_hadoop.PathIsExit(ta):
            start_time = datetime.datetime.now().second

            cf = SparkConf().setAppName("[ET_update_member-{}-{}] {}".format(
                shop, caldate, start_time)).set("spark.cores.max", "40")
            sc = SparkContext(conf=cf)
            raw_ta = sc.textFile(
                ta, 20).filter(lambda x: x.split("\t")[0] == shop).cache()
            R = []

            for x in DICT:
                tag1 = int(x[x.keys()[0]].split(",")[0])
                tag = x.keys()[0]
                if tag1 == 0:
                    counts = raw_ta.filter(
                        lambda x: x.split("\t")[2] == "L7D").count()

Exemple #6

0

Afficher le fichier

    #get all et enable clients
    enable_client = []
    cmd = 'curl -s -k {}'.format(eturl)
    try:
        r = os.popen(cmd).read().strip()
        if r != '':
            data = ast.literal_eval(r)
            if data['code'] == 0:
                enable_client = data['result']['enable_company_codes'].split(',')
    except:
        pass
 
    if shopid in enable_client:
        #spark init
        if util_hadoop.PathIsExit(os.path.dirname(hdfs)):
            #get laste ta
            if not util_hadoop.PathIsExit(ta_parent):
                ls = util_hadoop.GetPath(ta_parent.rsplit("/",1)[0])
                ta_path = max(ls) + "/*"
                lsdate = max(ls).rsplit("/",1)[1] #the laste hadoop ta date
        
            #ta is exist
            SparkContext.setSystemProperty('spark.executor.cores', '4')
            SparkContext.setSystemProperty('spark.cores.max', '48')
            sc = SparkContext(appName="et_groups_inc_calculation_member_{}".format(shopid))
            def mapp(line):
                try:
                    return line.split('\t')[3]
                except:
                    pass