from dip.util import timetool import sys import random reload(sys) sys.setdefaultencoding("utf-8") import re from pyspark.sql.types import StructType, StructField, StringType, IntegerType, FloatType, ArrayType import json import time conf = SparkConf().setAppName( "app_picserversweibof6vwt_wapvideodownload_to_hdfs") sc = SparkContext(conf=conf) hc = HiveContext(sc) try: source = sc.textFile( "/user/hdfs/rawlog/app_picserversweibof6vwt_wapvideodownload/" + timetool.getHDFSDayDir(sys.argv[1])) pattern = re.compile("^([^`]*)`([^`]*)") def lineParse(line): matcher = pattern.match(line) if not matcher: return None