from dip.util import timetool
import sys
import random

reload(sys)
sys.setdefaultencoding("utf-8")

import re
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, FloatType, ArrayType
import json
import time

conf = SparkConf().setAppName(
    "app_picserversweibof6vwt_wapvideodownload_to_hdfs")

sc = SparkContext(conf=conf)

hc = HiveContext(sc)

try:
    source = sc.textFile(
        "/user/hdfs/rawlog/app_picserversweibof6vwt_wapvideodownload/" + timetool.getHDFSDayDir(sys.argv[1]))

    pattern = re.compile("^([^`]*)`([^`]*)")

    def lineParse(line):
        matcher = pattern.match(line)

        if not matcher:
            return None