Exemple #1
0
def prepare_data(posfilpath, negfilepath, size, word2index):
    posfiles = common.get_files(posfilpath)
    negfiles = common.get_files(negfilepath)
    sz = min(2*len(posfiles), 2*len(negfiles), size)

    x_train = list()
    y_train = np.zeros((size, 1))

    for i in range(int(size/2)):
        posline = common.get_content(posfilpath + posfiles[i])
        k = 2*i
        if (posline != ""):
            sent_pos = posline.lower().split(" ")
            sent_ls = list()
            for word in sent_pos:
                if word in word2index:
                    sent_ls.append(word2index[word])
                    y_train[k][0] = 1
            x_train.append(list(set(sent_ls)))
        
        negline = common.get_content(negfilepath + negfiles[i])

        if (negline != ""):
            sent_neg = negline.lower().split(" ")
            sent_ls = list()
            for word in sent_neg:
                if word in word2index:
                    sent_ls.append(word2index[word])
            x_train.append(list(set(sent_ls)))
    return (x_train, y_train, sz)
Exemple #2
0
def prepare_data(posfilpath, negfilepath, size):
    posfiles = common.get_files(posfilpath)
    negfiles = common.get_files(negfilepath)
    sz = min(2 * len(posfiles), 2 * len(negfiles), size)

    x_train = list()
    x_concat = list()

    for i in range(int(size / 2)):
        posline = common.get_content(posfilpath + posfiles[i])
        if (posline != ""):
            sent_pos = posline.lower().split(" ")
            sent_ls = list()
            for word in sent_pos:
                if word in word2index:
                    word_i = word2index[word]
                    sent_ls.append(word_i)
                    x_concat.append(word_i)
            x_train.append(sent_ls)

        negline = common.get_content(negfilepath + negfiles[i])

        if (negline != ""):
            sent_neg = negline.lower().split(" ")
            sent_ls = list()
            for word in sent_neg:
                if word in word2index:
                    word_i = word2index[word]
                    sent_ls.append(word_i)
                    x_concat.append(word_i)
            x_train.append(sent_ls)

    return (x_train, x_concat, sz)
Exemple #3
0
def get_funshion_vid(rurl):
    if re.match(r'http://www.fun.tv/vplay/.*m-(\d+)', rurl):
        vid = r1(r'http://www.fun.tv/vplay/.*m-(\d+)', rurl)
    else:
        html = get_content(url)
        vid = r1(r'\"mediaid\":(\d+)', html)
    return vid
Exemple #4
0
def get_pps_vid(html):
    if re.match(r'http://v.pps.tv/play_(.*).html',html):
        vid = r1(r'http://v.pps.tv/play_(.*).html',html)
    else:
        con = get_content(html)
        vid = r1(r'url_key: "(.*)",',con)
    return vid
Exemple #5
0
def get_urls_by_vid(vid):
    urls = []
    tn = get_timestamp()
    key = get_key(tn)
    url = 'http://api.letv.com/mms/out/video/playJson?id={}&platid=1&splatid=101&format=1&tkey={}&domain=www.letv.com'.format(
        vid, key)
    info = get_content(url)
    playurl = json.loads(info)['playurl']
    domain = playurl['domain'][0]
    dispatch = playurl['dispatch']
    for k in dispatch.keys():
        template = {}
        url = dispatch[k][0]
        rate = get_rateid(k)
        template['rate'] = rate[1]
        url = domain + url + '&retry=1&tag=flash&sign=webdisk_19722818&termid=1&pay=0&ostype=windows&hwtype=un'
        url = url.replace('platid=1', 'platid=14')
        url = url.replace('splatid=101', 'splatid=1401')
        print rate[1]
        print url
        template['furls'] = [url]
        urls.append(template)
        if 'tss=ios' in url:
            ano = url.replace('tss=ios', 'tss=no')
        else:
            ano = url.replace('tss=no', 'tss=ios')
        print ano
        urls.append({'rate': rate[1], 'furls': [ano]})
    return urls
Exemple #6
0
def get_m1905_vid(html):
    if re.match(r"http://www.1905.com/vod/play/(.*).shtml.*", html):
        vid = r1(r"http://www.1905.com/vod/play/(.*).shtml.*", html)
    else:
        con = get_content(html)
        vid = r1(r'vid : "(.*)",', con)
    return vid
Exemple #7
0
    def collect(self):
        folder_queue = [self.parser.source]
        home_path_len = len(folder_queue[0])

        while (len(folder_queue) > 0):

            current_folder = folder_queue[0]
            folder_queue = folder_queue[1:]

            if len(current_folder[home_path_len:]) == 0:
                print("[+] Scan /")
            else:
                print("[+] Scan {}".format(current_folder[home_path_len:]))

            files, folders = get_content(current_folder)

            # skip folder named '.folder'
            # generate full path
            for folder in folders:
                if folder[0] != '.':
                    full_path = current_folder + '/' + folder
                    folder_queue.append(full_path)

            # work with files
            for f in files:
                try:
                    fp = current_folder + '/' + f  # full path to file
                    tag = TinyTag.get(fp)
                except LookupError:
                    continue
                except:
                    print("Cannot get tag from file --> Skip\n\t{}".format(fp))
                    continue

                self.sort(fp, tag)
Exemple #8
0
def get_letv_vid(url):
    if re.match(r'http://www.letv.com/ptv/vplay/(\d+).html', url):
        vid = match1(url, r'http://www.letv.com/ptv/vplay/(\d+).html')
    else:
        html = get_content(url)
        vid = match1(html, r'vid="(\d+)"')
    return vid
Exemple #9
0
def get_urls_by_vid(vid):
    urls = []
    tn = get_timestamp()
    key = get_key(tn)
    url = 'http://api.letv.com/mms/out/video/playJson?id={}&platid=1&splatid=101&format=1&tkey={}&domain=www.letv.com'.format(vid, key)
    info = get_content(url)
    playurl = json.loads(info)['playurl']
    domain = playurl['domain'][0]
    dispatch = playurl['dispatch']
    for k in dispatch.keys():
        template = {}
        url = dispatch[k][0]
        rate = get_rateid(k)
        template['rate'] = rate[1] 
        url = domain + url  + '&retry=1&tag=flash&sign=webdisk_19722818&termid=1&pay=0&ostype=windows&hwtype=un'
        url = url.replace('platid=1', 'platid=14')
        url = url.replace('splatid=101','splatid=1401')
        print rate[1]
        print url
        template['furls'] = [url]
        urls.append(template)
        if 'tss=ios' in url:
            ano = url.replace('tss=ios','tss=no')
        else:
            ano = url.replace('tss=no','tss=ios')
        print ano    
        urls.append({'rate':rate[1],'furls':[ano]})
    return urls
Exemple #10
0
def get_qq_vid(url):
    if re.match(r'http://v.qq.com/([^\?]+)\?vid', url):
        vid = r1(r'http://v.qq.com/[^\?]+\?vid=(\w+)', url)
    else:
        html = get_content(url)
        vid = r1(r'vid:"(.*)"', html)  
    return vid
Exemple #11
0
    def youku_ups_TV(self):
        # + vid + ccode + client_ip + utid + client_ts + ckey + password
        url = 'https://ups.cp31.ott.cibntv.net/ups/get.json?vid={}&ccode={}'.format(
            self.vid, self.ccode)
        url += '&client_ip=192.168.1.5'
        self.utid = self.fetch_cna()
        url += '&utid=' + self.utid
        url += '&client_ts=' + str(int(time.time()))

        #self.ckey = 'fdffd'
        self.ckey = '7B19C0AB12633B22E7FE81271162026020570708D6CC189E4924503C49D243A0DE6CD84A766832C2C99898FC5ED31F3709BB3CDD82C96492E721BDD381735026'
        url += '&ckey=' + urllib.parse.quote(self.ckey)  # 编码操作

        if self.password_protected:
            url += '&password='******'User-Agent'] = self.ua

        self.UpsUrl = url
        api_meta = json.loads(get_content(url, headers=headers))

        self.api_data = api_meta['data']
        data_error = self.api_data.get('error')
        if data_error:
            self.api_error_code = data_error.get('code')
            self.api_error_msg = data_error.get('note')

        if 'videos' in self.api_data:
            if 'list' in self.api_data['videos']:
                self.video_list = self.api_data['videos']['list']
            if 'next' in self.api_data['videos']:
                self.video_next = self.api_data['videos']['next']
Exemple #12
0
def get_pps_vid(html):
    if re.match(r'http://v.pps.tv/play_(.*).html', html):
        vid = r1(r'http://v.pps.tv/play_(.*).html', html)
    else:
        con = get_content(html)
        vid = r1(r'url_key: "(.*)",', con)
    return vid
Exemple #13
0
def get_m1905_vid(html):
    if re.match(r'http://www.1905.com/vod/play/(.*).shtml.*',html):
        vid = r1(r'http://www.1905.com/vod/play/(.*).shtml.*',html)
    else:
        con = get_content(html)
        vid = r1(r'vid : "(.*)",',con)
    return vid
Exemple #14
0
    def youku_ups(self):
        # + vid + ccode + client_ip + utid + client_ts + ckey + password
        url = 'https://ups.youku.com/ups/get.json?vid={}&ccode={}'.format(
            self.vid, self.ccode)
        url += '&client_ip=192.168.1.2'

        self.utid = self.fetch_cna()
        # self.utid = 'W59PmgAAACkDANk5JyfUl791'
        url += '&utid=' + self.utid

        #url += '&utid=' + self.getUtid().decode('utf-8')
        url += '&client_ts=' + str(int(time.time()))
        self.ckey = 'DIl58SLFxFNndSV1GFNnMQVYkx1PP5tKe1siZu/86PR1u/Wh1Ptd+WOZsHHWxysSfAOhNJpdVWsdVJNsfJ8Sxd8WKVvNfAS8aS8fAOzYARzPyPc3JvtnPHjTdKfESTdnuTW6ZPvk2pNDh4uFzotgdMEFkzQ5wZVXl2Pf1/Y6hLK0OnCNxBj3+nb0v72gZ6b0td+WOZsHHWxysSo/0y9D2K42SaB8Y/+aD2K42SaB8Y/+ahU+WOZsHcrxysooUeND'
        url += '&ckey=' + urllib.parse.quote(self.ckey)  #编码操作

        if self.password_protected:
            url += '&password='******'User-Agent'] = self.ua
        self.UpsUrl = url
        api_meta = json.loads(get_content(url, headers=headers))

        self.api_data = api_meta['data']
        data_error = self.api_data.get('error')
        if data_error:
            self.api_error_code = data_error.get('code')
            self.api_error_msg = data_error.get('note')

        if 'videos' in self.api_data:
            if 'list' in self.api_data['videos']:
                self.video_list = self.api_data['videos']['list']
            if 'next' in self.api_data['videos']:
                self.video_next = self.api_data['videos']['next']
Exemple #15
0
def get_funshion_vid(rurl):
    if re.match(r'http://www.fun.tv/vplay/.*m-(\d+)',rurl):
            vid = r1(r'http://www.fun.tv/vplay/.*m-(\d+)',rurl)
    else:
        html = get_content(url)
        vid = r1(r'\"mediaid\":(\d+)',html)
    return vid
Exemple #16
0
def get_vkey_by_id(vid,idx,fmt):
    xml = get_content('http://vv.video.qq.com/getclip?vid={}&idx={}&fmt={}'.format(vid,idx,fmt))
    root = ET.fromstring(xml)
    fn = root.find('vi/fn').text
    vkey = root.find('vi/key').text
    suffix = fn+'?vkey='+vkey
    return suffix 
Exemple #17
0
def get_fun_allurls(vid,playnum):
    urls = []
    pos = 0
    info = get_content('http://jsonfe.funshion.com/media/?cli=ipad&ver=2.0.0.1&ta=0&mid={}'.format(vid))
    number = r2(r'"number":"(\d*)",',info)
    print len(number),number
#    mpurls = r2(r'"mpurls":(\{.*?\{.*?\}.*?\{.*?\}.*?\{.*?\}\})',info)
    tvurl = r2('\"tv\":{\"url\":\"(.*?)\"',info)
    dvdurl = r2('\"dvd\":{\"url\":\"(.*?)\"',info)
    highdvd = r2('\"highdvd\":{\"url\":\"(.*?)\"',info)
    print len(tvurl),tvurl
    print len(dvdurl),dvdurl
    print len(highdvd),highdvd
    
#    print len(number),number,len(mpurls),mpurls
    if len(number) == 0:
        urls = []
    elif len(number) == len(tvurl):
        for i in range(0,len(number)):
            if number[i] == playnum:
                pos = i
                break
        if pos < len(tvurl):
            template = {}
            template['rate'] = get_clarity('tv')
            furls = [tvurl[pos].replace('\\','')]
            template['furls'] = furls
            urls.append(template)
        if pos < len(dvdurl):
            template = {}
            template['rate'] = get_clarity('dvd')
            furls = [dvdurl[pos].replace('\\','')]
            template['furls'] = furls
            urls.append(template)
        if pos < len(highdvd):
            template = {}
            template['rate'] = get_clarity('highdvd')
            furls = [highdvd[pos].replace('\\','')]
            template['furls'] = furls
            urls.append(template)
    else:
        if 0 < len(tvurl):
            template = {}
            template['rate'] = get_clarity('tv')
            furls = [tvurl[0].replace('\\','')]
            template['furls'] = furls
            urls.append(template)
        if 0 < len(dvdurl):
            template = {}
            template['rate'] = get_clarity('dvd')
            furls = [dvdurl[0].replace('\\','')]
            template['furls'] = furls
            urls.append(template)
        if 0 < len(highdvd):
            template = {}
            template['rate'] = get_clarity('highdvd')
            furls = [highdvd[0].replace('\\','')]
            template['furls'] = furls
            urls.append(template)
    return urls
Exemple #18
0
def get_qq_vid(url):
    if re.match(r'http://v.qq.com/([^\?]+)\?vid', url):
        vid = r1(r'http://v.qq.com/[^\?]+\?vid=(\w+)', url)
    else:
        html = get_content(url)
        vid = r1(r'vid:"(.*)"', html)
    return vid
Exemple #19
0
def get_letv_vid(url):
    if re.match(r'http://www.letv.com/ptv/vplay/(\d+).html', url):
        vid = match1(url,r'http://www.letv.com/ptv/vplay/(\d+).html')
    else:
        html = get_content(url)
        vid = match1(html, r'vid="(\d+)"')
    return vid
Exemple #20
0
def getVMS(tvid, vid, uid):
    tm = randint(1000, 2000)
    vmsreq = 'http://cache.video.qiyi.com/vms?key=fvip&src=p' + "&tvId=" + tvid + "&vid=" + vid + "&vinfo=1&tm=" + str(
        tm) + "&enc=" + hashlib.new(
            'md5', bytes('ts56gh' + str(tm) +
                         tvid)).hexdigest() + "&qyid=" + uid + "&tn=" + str(
                             random())
    return json.loads(get_content(vmsreq))
Exemple #21
0
def get_m1905_m3u8(vid):
    try:
        url = "http://www.1905.com/api/video/getmediainfo.php?id={}&type=0&source_key=m3u8ipad".format(vid)
        con = get_content(url)
        m3url = r1(r'"iosurl":"(.*?)",', con)
        m3u8 = base64.decodestring(m3url)
    except Exception, e:
        print e
        m3u8 = None
Exemple #22
0
def get_content(self):
    infos = [
        ("Version", "%s -> %s" % (self.request_version, self.server_version)),
        ("Method", self.command),
        ("Path", self.path),
    ]

    return common.get_content(proto[0], infos, self.connection.getpeername(),
                              self.connection.getsockname(), self.headers)
Exemple #23
0
def get_vid(url):
    html = get_content(url)
    try:
        pattern = re.compile("share.vrs.sohu.com/(.*?)/")
        match = pattern.search(html)
        vid = match.group(1)
    except :
        vid = r1(r'vid="(.*)";',html)
    return vid
Exemple #24
0
def get_vkey_by_id(vid, idx, fmt):
    xml = get_content(
        'http://vv.video.qq.com/getclip?vid={}&idx={}&fmt={}'.format(
            vid, idx, fmt))
    root = ET.fromstring(xml)
    fn = root.find('vi/fn').text
    vkey = root.find('vi/key').text
    suffix = fn + '?vkey=' + vkey
    return suffix
Exemple #25
0
 def get_vid_from_page(self):
     if not self.url:
         raise Exception('No url')
     self.page = get_content(self.url)
     b64p = r'([a-zA-Z0-9=]+)'
     str = 'videoId2: \'(.+)\''
     hit = re.search(str, self.page)
     if hit is not None:
         self.vid = hit.group(1)
Exemple #26
0
def get_vid(url):
    html = get_content(url)
    try:
        pattern = re.compile("share.vrs.sohu.com/(.*?)/")
        match = pattern.search(html)
        vid = match.group(1)
    except:
        vid = r1(r'vid="(.*)";', html)
    return vid
Exemple #27
0
def get_news_url_by_vid(vid):
    try:
        url = "http://video.sina.com.cn/interface/video_ids/video_ids.php?v={}".format(vid)
        html = json.loads(get_content(url))
        newsvid = html["ipad_vid"]
        url = "http://v.iask.com/v_play_ipad.php?vid={}&tags=newsList_web".format(newsvid)
    except Exception, e:
        print e
        url = None
Exemple #28
0
def get_m1905_m3u8(vid):
    try:
        url = 'http://www.1905.com/api/video/getmediainfo.php?id={}&type=0&source_key=m3u8ipad'.format(vid)
        con = get_content(url)
        m3url = r1(r'"iosurl":"(.*?)",',con)
        m3u8 = base64.decodestring(m3url)
    except Exception,e:
        print e
        m3u8 = None
Exemple #29
0
def get_funshion_playnum(rurl):
    playNum = r1('http://www.fun.tv/vplay/.*m-\d+.e-(\d+)', rurl)
    print 'playNum', playNum
    if playNum == None:
        html = get_content(rurl)
        playNum = r1("minfo.playNumber = \'(\d+)\';", html)
    if playNum == None:
        playNum = 1
        print 'playNum2', playNum
    return playNum
Exemple #30
0
def get_funshion_playnum(rurl):
    playNum = r1('http://www.fun.tv/vplay/.*m-\d+.e-(\d+)',rurl)
    print 'playNum',playNum
    if playNum == None:
        html = get_content(rurl)
        playNum = r1("minfo.playNumber = \'(\d+)\';",html)
    if playNum == None:
        playNum = 1
        print 'playNum2',playNum
    return playNum
Exemple #31
0
def get_video(vid, stream_type=None):
    url = "http://v.youku.com/player/getPlayList/VideoIDS/{}/Pf/4/ctype/12/ev/1".format(
        vid)
    vvid = vid
    info = json.loads(get_content(url))
    #key = '%s%x' % (info['data'][0]['key2'], int(info['data'][0]['key1'], 16) ^ 0xA55AA5A5)
    data = info['data'][0]
    segs = data['segs']
    types = segs.keys()
    if not stream_type:
        for x in ['hd3', 'hd2', 'mp4', 'flv']:
            if x in types:
                stream_type = x
                break
        else:
            raise NotImplementedError()
    assert stream_type in ('hd3', 'hd2', 'mp4', 'flv')
    print 'stream_type', stream_type
    file_type = {
        'hd3': 'flv',
        'hd2': 'flv',
        'mp4': 'mp4',
        'flv': 'flv'
    }[stream_type]

    seed = info['data'][0]['seed']
    source = list(
        "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\\:._-1234567890"
    )
    mixed = ''
    while source:
        seed = (seed * 211 + 30031) & 0xFFFF
        index = seed * len(source) >> 16
        c = source.pop(index)
        mixed += c

    ids = info['data'][0]['streamfileids'][stream_type].split('*')[:-1]
    vid = ''.join(mixed[int(i)] for i in ids)

    sid = '%s%s%s' % (int(time.time() * 1000), randint(
        1000, 1999), randint(1000, 9999))

    ep = data['ep']
    ip = data['ip']
    query = get_segurls_param(ep, ip)
    print query
    urls = []
    for s in segs[stream_type]:
        no = '%02x' % int(s['no'])
        url = 'http://k.youku.com/player/getFlvPath/sid/%s_%s/st/%s/fileid/%s%s%s?K=%s&ts=%s' % (
            sid, no, file_type, vid[:8], no.upper(), vid[10:], s['k'],
            s['seconds'])
        print url + '&' + query
        urls.append((url, int(s['size'])))
    return urls
Exemple #32
0
def get_video_flv_complete_by_id(vid):
    try:
        info = get_content('http://vv.video.qq.com/getinfo?vids={}&otype=xml&defaultfmt=flv'.format(vid))
        root = ET.fromstring(info)
        fn = root.find('vl/vi/fn').text
        fvkey = root.find('vl/vi/fvkey').text
        ui = root.find('vl/vi/ul/ui/url').text
        url = ui+fn+'?vkey='+fvkey
    except Exception,e:
        print e
        url = None
Exemple #33
0
def get_pps_urls_by_id(vid):
    urls = []
    for i in range(0,2):
        con = get_content('http://dp.ugc.pps.tv/get_play_url_cdn.php?sid={}&flash_type=1&type={}'.format(vid, i))
        if 'pfv' in con:
            template = {}
            con = r1(r'(.*)&all.*',con)
            template['rate'] = get_pps_rate(str(i))
            template['furls'] = [con]
            urls.append(template)
    return urls
Exemple #34
0
def get_news_url_by_vid(vid):
    try:
        url = 'http://video.sina.com.cn/interface/video_ids/video_ids.php?v={}'.format(
            vid)
        html = json.loads(get_content(url))
        newsvid = html['ipad_vid']
        url = 'http://v.iask.com/v_play_ipad.php?vid={}&tags=newsList_web'.format(
            newsvid)
    except Exception, e:
        print e
        url = None
Exemple #35
0
def get_urls(html):
    try:
        vid = get_wasu_id(html)
        print vid
        suffix = get_suffix_by_html(html)
        url = 'http://www.wasu.cn/Api/getVideoUrl/id/' + vid + suffix
        info = get_content(url)
        root = ET.fromstring(info)
        url = root.find('video').text
        print url
    except:
        url = 'Error'
    return parser2dic([{'rate': '标清', 'furls': [url]}])
Exemple #36
0
def get_video_mp4_complete_by_id(vid):
    try:
        info = get_content(
            'http://vv.video.qq.com/getinfo?vids={}&otype=xml&defaultfmt=mp4'.
            format(vid))
        root = ET.fromstring(info)
        fn = root.find('vl/vi/fn').text
        fvkey = root.find('vl/vi/fvkey').text
        ui = root.find('vl/vi/ul/ui/url').text
        url = ui + fn + '?vkey=' + fvkey
    except Exception, e:
        print e
        url = None
Exemple #37
0
def get_pps_urls_by_id(vid):
    urls = []
    for i in range(0, 2):
        con = get_content(
            'http://dp.ugc.pps.tv/get_play_url_cdn.php?sid={}&flash_type=1&type={}'
            .format(vid, i))
        if 'pfv' in con:
            template = {}
            con = r1(r'(.*)&all.*', con)
            template['rate'] = get_pps_rate(str(i))
            template['furls'] = [con]
            urls.append(template)
    return urls
Exemple #38
0
def get_urls(html):
    try:
        vid = get_wasu_id(html)
        print vid
        suffix = get_suffix_by_html(html)
        url = "http://www.wasu.cn/Api/getVideoUrl/id/" + vid + suffix
        info = get_content(url)
        root = ET.fromstring(info)
        url = root.find("video").text
        print url
    except:
        url = "Error"
    return parser2dic([{"rate": "标清", "furls": [url]}])
Exemple #39
0
def get_content(s, request, context):
    method = sys._getframe(1).f_code.co_name
    infos = [
        ("Service", "-helloworld.Greeter"),
        ("Method", method),
    ]

    headers = {}
    for c in context.invocation_metadata():
        headers[c.key] = c.value

    return common.get_content("grpc", infos, context.peer(), get_host_ip(),
                              headers)
Exemple #40
0
def get_video_sections_by_id(vid, fmt):
    try:
        xml = get_content('http://vv.video.qq.com/getinfo?vids=%s' % vid +'&defaultfmt=%s' % fmt)
        root = ET.fromstring(xml)
        num = root.find('vl/vi/cl/fc').text
        ui = root.find('vl/vi/ul/ui/url').text
        urls = []
        for i in range(1,int(num)+1):
            suffix = get_vkey_by_id(vid, i, fmt)
            url = ui+suffix
            urls.append(url)
    except Exception,e:
        print e
        urls = None
Exemple #41
0
def prepare_data(posfilpath, negfilepath, size):
    posfiles = common.get_files(posfilpath)
    negfiles = common.get_files(negfilepath)

    reviews = list()
    raw_sent = list()
    for i in range(int(size / 2)):
        posline = common.get_content(posfilpath + posfiles[i])
        if (posline != ""):
            posline = posline.lower()
            sent = posline.split(" ")
            reviews.append(makesentvec(sent))
            raw_sent.append(posline)

        negline = common.get_content(negfilepath + negfiles[i])

        if (negline != ""):
            negline = negline.lower()
            sent = negline.split(" ")
            reviews.append(makesentvec(sent))
            raw_sent.append(negline)

    return (reviews, raw_sent)
Exemple #42
0
def get_real_urls(video_links,gen_uid,info):
    urls = []
    for i in video_links:
        vlink=i["l"]
        # print(vlink)
        if not vlink.startswith("/"):
            #vlink is encode
            vlink=getVrsEncodeCode(vlink)
        assert vlink.endswith(".f4v")
        key=getDispathKey(vlink.split("/")[-1].split(".")[0])
        baseurl=info["data"]["vp"]["du"].split("/")
        baseurl.insert(-1,key)
        url="/".join(baseurl)+vlink+'?su='+gen_uid+'&client=&z=&bt=&ct=&tn='+str(randint(10000,20000))
        urls.append(json.loads(get_content(url))["l"])
    return urls
Exemple #43
0
def get_kankan_mparam(gcid,param):
    info = get_content('http://mp4.cl.kankan.com/getCdnresource_flv?gcid={}'.format(gcid))
    ip = r1(r'ip:"(.*?)"',info)
    path = r1(r'path:"(.*?)"',info)
    url = 'http://' + ip +'/'+ path
    param1 = r1(r'param1:(.*),',info)
    param2 = r1(r'param2:(.*)}',info)
    if param == 'url':
        return url
    elif param == 'param1':
        return param1
    elif param == 'param2':
        return param2
    else:
        return url
Exemple #44
0
def get_video_sections_by_id(vid, fmt):
    try:
        xml = get_content('http://vv.video.qq.com/getinfo?vids=%s' % vid +
                          '&defaultfmt=%s' % fmt)
        root = ET.fromstring(xml)
        num = root.find('vl/vi/cl/fc').text
        ui = root.find('vl/vi/ul/ui/url').text
        urls = []
        for i in range(1, int(num) + 1):
            suffix = get_vkey_by_id(vid, i, fmt)
            url = ui + suffix
            urls.append(url)
    except Exception, e:
        print e
        urls = None
Exemple #45
0
def get_kankan_mparam(gcid, param):
    info = get_content(
        'http://mp4.cl.kankan.com/getCdnresource_flv?gcid={}'.format(gcid))
    ip = r1(r'ip:"(.*?)"', info)
    path = r1(r'path:"(.*?)"', info)
    url = 'http://' + ip + '/' + path
    param1 = r1(r'param1:(.*),', info)
    param2 = r1(r'param2:(.*)}', info)
    if param == 'url':
        return url
    elif param == 'param1':
        return param1
    elif param == 'param2':
        return param2
    else:
        return url
Exemple #46
0
def get_m1905_urls(vid):
    urls = []
    m3u8url = get_m1905_m3u8(vid)
    if m3u8url != None:
        urls.append({"rate": "标清", "furls": [m3u8url]})
    fir = r1(r"(\d).*", vid)
    sec = r1(r"\d(\d).*", vid)
    info = get_content("http://static.m1905.cn/profile/vod/{}/{}/{}_1.xml".format(fir, sec, vid))
    root = ET.fromstring(info)
    links = root.find("playlist/item").attrib
    for i in links:
        if i in ["url", "sdurl", "bkurl", "hdurl"]:
            template = {}
            template["rate"] = get_clarity(i)
            template["furls"] = [links[i]]
            urls.append(template)
    return urls
Exemple #47
0
def get_m1905_urls(vid):
    urls = []
    m3u8url = get_m1905_m3u8(vid)
    if m3u8url != None:
        urls.append({'rate':'标清','furls':[m3u8url]})
    fir = r1(r'(\d).*',vid)
    sec = r1(r'\d(\d).*',vid)
    info = get_content('http://static.m1905.cn/profile/vod/{}/{}/{}_1.xml'.format(fir,sec,vid))
    root = ET.fromstring(info)
    links = root.find('playlist/item').attrib
    for i in links:
        if i in ['url','sdurl','bkurl','hdurl']:
            template = {}
            template['rate'] = get_clarity(i)
            template['furls'] = [links[i]] 
            urls.append(template)     
    return urls
Exemple #48
0
def get_real_urls(video_links, gen_uid, info):
    urls = []
    for i in video_links:
        vlink = i["l"]
        # print(vlink)
        if not vlink.startswith("/"):
            #vlink is encode
            vlink = getVrsEncodeCode(vlink)
        assert vlink.endswith(".f4v")
        key = getDispathKey(vlink.split("/")[-1].split(".")[0])
        baseurl = info["data"]["vp"]["du"].split("/")
        baseurl.insert(-1, key)
        url = "/".join(
            baseurl
        ) + vlink + '?su=' + gen_uid + '&client=&z=&bt=&ct=&tn=' + str(
            randint(10000, 20000))
        urls.append(json.loads(get_content(url))["l"])
    return urls
Exemple #49
0
def get_cntv_urls_by_id(pid):
    urls = []
    info = json.loads(get_content('http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + pid))
    hls_url = info['hls_url']
    if hls_url != '':
        template = {'rate':'标清','furls':[hls_url]}
        urls.append(template)
    video = info['video']
    for x in video.keys():
        if x in ["chapters2","lowChapters","chapters"]:
            templates = {}
            segs = video[x]
            templates['rate'] = get_clarity(x)
            furls = []
            for y in range(0,len(segs)):
                furls.append(segs[y]['url'])
            templates['furls'] = furls
            urls.append(templates)
    return urls
Exemple #50
0
def get_urls(url):
    urls = []
    newsvid = get_newsvid(url)
    if newsvid != None:
        newsurl = get_news_url_by_vid(newsvid)
        if newsurl != None:
            urls.append({"rate": "标清", "furls": [newsurl]})
    html = get_content(url)
    ipadvid = get_ipadvid(html)
    if ipadvid != None:
        ipadurl = get_urls_by_vid(ipadvid)
        if ipadurl != None:
            urls.append({"rate": "标清", "furls": ipadurl})
    segvids = get_segvids(html)
    if segvids != None:
        for i in range(0, len(segvids)):
            url = get_urls_by_vid(segvids[i])
            if url != None:
                urls.append({"rate": rate[i], "furls": url})

    return parser2dic(urls)
Exemple #51
0
def get_cntv_urls_by_id(pid):
    urls = []
    info = json.loads(
        get_content('http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' +
                    pid))
    hls_url = info['hls_url']
    if hls_url != '':
        template = {'rate': '标清', 'furls': [hls_url]}
        urls.append(template)
    video = info['video']
    for x in video.keys():
        if x in ["chapters2", "lowChapters", "chapters"]:
            templates = {}
            segs = video[x]
            templates['rate'] = get_clarity(x)
            furls = []
            for y in range(0, len(segs)):
                furls.append(segs[y]['url'])
            templates['furls'] = furls
            urls.append(templates)
    return urls
Exemple #52
0
def get_urls(url):
    urls = []
    for i in range(0,3):
        template = {}
        flvcdurl = "http://www.flvcd.com/parse.php?format={}&kw={}".format(form[i], quote(url))
        content = get_content(flvcdurl)
        furls = r2('<BR><a href=\"(.*?)\" target=',content)
        if furls!= []:
            print 'flvcd multi'
            template['rate'] = rate[i]
            template['furls'] = furls
            urls.append(template)
        else:
            print 'flvcd single'
            sfurls = r1('<br>.*?<a href=\"(.*?)\" target',content)
            print 'sfurls',sfurls
            if(sfurls!=None):
                template['rate'] = rate[i]
                template['furls'] = [sfurls]
                urls.append(template)

    return parser2dic(urls)
Exemple #53
0
def get_iqiyi_urls(url):
    allurls = []
    threads = []
    gen_uid = uuid4().hex
    html = get_content(url)
    tvid = r1(r'data-player-tvid="([^"]+)"', html)
    videoid = r1(r'data-player-videoid="([^"]+)"', html)
    assert tvid
    assert videoid
    info = getVMS(tvid,videoid,gen_uid)      
    bids = []
    videos = []
    try:
        for i in info["data"]["vp"]["tkl"][0]["vs"]:
            bid=int(i["bid"])
            bids.append(bid)
            video_links=i["fs"]
            videos.append(video_links)
        for i in range(0,len(videos)):
            thread1 = getUrls(i,[videos[i], gen_uid, info,str(bids[i])])
            threads.append(thread1)
            thread1.start()
    except Exception,e:
        print e
Exemple #54
0
def get_timestamp():
    tn = random.random()
    url = 'http://api.letv.com/time?tn={}'.format(tn)
    result = get_content(url)
    return json.loads(result)['stime']
Exemple #55
0
def getVMS(tvid,vid,uid):
    tm=randint(1000,2000)
    vmsreq='http://cache.video.qiyi.com/vms?key=fvip&src=p'+"&tvId="+tvid+"&vid="+vid+"&vinfo=1&tm="+str(tm)+"&enc="+hashlib.new('md5',bytes('ts56gh'+str(tm)+tvid)).hexdigest()+"&qyid="+uid+"&tn="+str(random())
    return json.loads(get_content(vmsreq))
Exemple #56
0
def getDispathKey(rid):
    tp=")(*&^flash@#$%a"  #magic from swf
    time=json.loads(get_content("http://data.video.qiyi.com/t?tn="+str(random())))["t"]
    t=str(int(floor(int(time)/(10*60.0))))
    return hashlib.new("md5",bytes(t+tp+rid)).hexdigest()
Exemple #57
0
def get_suffix_by_html(html):
    con = get_content(html)
    playUrl = r1(r"_playUrl = \'(.*?)\',", con)
    playKey = r1(r"_playKey = \'(.*?)\',", con)
    suffix = "/url/" + playUrl + "/key/" + playKey
    return suffix
Exemple #58
0
 def get_texts(self):
     with open(self.input) as f:
         for sentence in f:
             if not sentence.strip(): continue
             yield common.get_content(sentence)
Exemple #59
0
def get_gcid(html):
    con = get_content(html)
    gcid = r1(r'http://pubnet.sandai.net:8080/\d+/(.*?)/.*?.mp4',con)
    return gcid