예제 #1
0
def get_video_links_from_model_page(av_str):
    from_path = "http://upornia.com/models/" + av_str + "/"
    to_path = "model/" + av_str
    dl.download_url(from_path, to_path)
    f = codecs.open(str(to_path), "r", "utf-8")
    if (f == None):
        return
#print("processing file: " + str(av_str))
    content = f.read()
    soup = BeautifulSoup(content);
    video_list_section = soup.find('div', {"id": "list_videos2_common_videos_list"})
    if video_list_section == None:
#print("video_list_section is empty")
        f.close()
        return
    items = video_list_section.find_all('article', {"class": "item"})
    if items == None:
#print("items are empty")
        f.close()
        return
    for item in items:
        url = item.find('a')
        if url == None:
#print("url is empty")
            continue;
        link = url['href']
        video_id = item['data-video-id']
#        print(link)
        dl_link_path = "video_url/" + video_id
        dl.download_url(link, dl_link_path)

    f.close()
예제 #2
0
def dl_videos(link):
    content = climber.get_content(link)
    if content == None:
        return
    soup = BeautifulSoup(content)
    boxes = soup.findAll('div', {'class': 'boxim'})
    if boxes == None:
        return

    links = []
    product_ids = []
    for sec in boxes:
        a_link_sec = sec.find('a')
        if a_link_sec == None:
            continue
        a_link = a_link_sec['href']
        a_title = a_link_sec['title']
        product_id = a_title.split(' ')[3]
        print("[{}] {}".format(product_id, a_link))
        links.append(a_link)
        product_ids.append(product_id)
    for idx, each_link in enumerate(links):
        content1 = climber.get_content(each_link)
        if content1 == None:
            continue
        soup1 = BeautifulSoup(content1)
        ep_num = get_ep_num(soup1)

        vid_link = get_final_video_link(each_link)
        if vid_link == '' or vid_link == None:
            continue
        # download this vid_link to product_id_1.mp4
        first_file_path = "{}_1.mp4".format(product_ids[idx])
        print("Download {} ...".format(first_file_path))
        if os.path.exists(first_file_path):
            print("{} exists, skip".format(first_file_path))
        else:
            dl.download_url(vid_link, first_file_path)

        #print(vid_link) 
        for i in range(2, ep_num + 1):
            new_link = "{}?ep={}".format(each_link, i)
            vid_link1 = get_final_video_link(new_link)
            if vid_link1 == '' or vid_link1 == None:
                continue
            #print(vid_link1)
            to_path = "{}_{}.mp4".format(product_ids[idx], i)
            print("Download {} ...".format(to_path))
            if os.path.exists(to_path):
                print("{} exists, skip".format(to_path))
            else:
                dl.download_url(vid_link1, to_path)
예제 #3
0
def getPhoto(soup):
    photo_section = soup.find('div', {"class": "photo"})
    if photo_section != None:
        tn_section = photo_section.find('p', {"class": "tn"})
        if tn_section != None:
            # May have no photo
            if tn_section.find('a') == None: 
                return None
            url = tn_section.a.img['src'].split('?width')[0]
            if 'noimage.gif' in url:
                return None
            download_path = "../video_img/" + str(av_ID) + ".jpg"
            dl.download_url(url, download_path)
예제 #4
0
def scan_jpg(path):
    if not os.path.exists(path):
        print('Path not exists: {}'.format(path))
        return
    for entry in scandir.scandir(path):
        if entry.is_dir(follow_symlinks=False):
            yield from scan_jpg(entry.path)  # see below for Python 2.x
        else:
            #print(entry.path) 
            if ".jpg" not in entry.path:
                file_without_ext = os.path.splitext(entry.path)[0]
                jpg_file = "{}.jpg".format(file_without_ext)
                if not os.path.exists(jpg_file):
                    bn = ntpath.basename(file_without_ext)
                    #print("checking {}".format(bn)) 
                    match =  re.match(r'^([A-Z0-9]+-[A-Z]*\d+)\D*', bn)
                    if match == None:
                        print("{} cannot pass the reg exp, skip".format(bn))
                        continue
                    product_id = ""
                    for gg in match.groups():
                        product_id = gg
                        #print("Check {}".format(product_id))
                    
                    #print("{} doesn't exist, downloading...".format(product_id))
                    hyper_link_path = 'https://www.javbus.com/{}'.format(product_id)
                    content = ""
                    try:
                        content = u''.join(urllib.request.urlopen(hyper_link_path).read().decode('utf-8'))
                    #except urllib.error.HTTPError as e:
                    except Exception:
                        pass

                    soup = BeautifulSoup(content)
                    link = soup.find('a', {'class': 'bigImage'})
                    if link == None:
                        print('Cannot find jpg link from "{}", abort...'.format(product_id))
                        debug = soup.find('div', {'class': 'bol-md-9'})
                        if debug == None:
                            continue
                        print(debug)
                        continue
                    link_path = link['href']
                    to_path = '/Volumes/wd2/new_cover/{}.jpg'.format(product_id) 
                    if (os.path.exists(to_path)):
                        continue
                    print("Download '{}' -> '{}".format(link_path, to_path))
                    dl.download_url(link_path, to_path)
예제 #5
0
파일: wat.py 프로젝트: mfajet/WAT
def download_output_wrapper(URL_num, URL, destpath, videos, i, rate_limit):
    """
	Wrapper around download to provide print output for info on what's downloaded and what's done
	"""
    print "Downloading URL #" + URL_num + ": " + URL + "\n"
    sys.stdout.flush()
    data = download_url(URL, destpath, videos, i, rate_limit)
    print "Finished for URL #" + URL_num + ": " + URL + "\n"
    sys.stdout.flush()
    return data
예제 #6
0
        if result == None:
            continue
        
        # download image
        frame = soup.find('div', {"class": "frame"})
        if frame != None:
            photo = frame.find('div', {"class": "photo"})
            if photo != None:
                imgTag = photo.find('img')
                if imgTag != None:
                    imgsrc = imgTag['src']
                    noImage = re.search('noimage', imgsrc) 
                    if noImage == None:
                        print(imgsrc)
                        img_path = "../image/" + str(file)
                        dl.download_url(imgsrc, img_path)
                    else:
                        print('no image...')
        # download itemBox
#prefix = "http://xcity.jp" 
#        items = soup.find_all('div', {"class": "x-itemBox-package"})
#        for item in items:
#            url = item.find('a')
#            if url == None:
#                continue;
#            url = prefix + url['href']
#            print(url)
#            download_path = "../video/" + url.split('?id=')[1]
#            dl.download_url(url, download_path)

        # update result to DB
예제 #7
0
        "--filename",
        dest="filename",
        required=False,
        type=str,
        help="sets custom name for output file (name without extension)"
    )

    parser.add_argument(
        "-o",
        "--output",
        dest="output",
        required=False,
        type=str,
        help="specify output directory (defaults to /home/Music)"
    )


if __name__ == "__main__":
    parser = ArgumentParser(
        description="Script for downloading music from youtube videos.\n"
    )

    add_arguments(parser)

    arguments = parser.parse_args()
    if url := arguments.url:
        interval = arguments.interval
        fname = arguments.filename
        output = arguments.output
        download_url(url, interval, fname, output)
        
예제 #8
0
from bs4 import BeautifulSoup
import download as dl
import os.path
# print the title
for fileId in range(1, 62490+1):
    if os.path.isfile("video/" + str(fileId)):
        continue;
    url = "http://xcity.jp/release/detail/?id=%d" % fileId
    print("download: " + url)
    
    dl.download_url(url, "video/" + str(fileId))

예제 #9
0
def get_page(page_num):
    from_path = "http://www.javbus.com/ja/actresses/%d" % page_num
    to_path = "actress_page/%d" % page_num
    dl.download_url(from_path, to_path)
예제 #10
0
파일: 23.py 프로젝트: toddpod/Ti_Collector
# encoding=utf8
#aurhor :LiMengming
#date:2017-10-19
#domain type
#source = https://ransomwaretracker.abuse.ch/downloads/RW_URLBL.txt
#source_id = 23

import download

stamp = 'BalckList'
source ='23'
url = 'https://ransomwaretracker.abuse.ch/downloads/RW_URLBL.txt'
download.download_url(source, stamp, url)
예제 #11
0
# encoding=utf8
#source:  https://openphish.com/feed.txt
#source ID : 20
#date:2017-9-22

import download

stamp = 'Phish'
source = '20'
url = 'https://openphish.com/feed.txt'
download.download_url(source, stamp, url)
예제 #12
0
def generate_contours_for_url(url, bucket, bucket_prefix):
    # Check if s3 files already exist
    s3_path_metric = get_s3_path(url, bucket_prefix, metric=True)
    s3_path_imperial = get_s3_path(url, bucket_prefix, metric=False)
    if s3_key_exists(bucket, key=s3_path_metric) and s3_key_exists(
            bucket, key=s3_path_imperial):
        print('s3 path exists; skipping')
        return None

    with TemporaryDirectory() as tmpdir:
        # Download url to local path
        print(url)
        local_path = download_url(url, tmpdir)

        # Unzip DEM
        with open(local_path, 'rb') as f:
            with ZipFile(f) as zf:
                img_file = [x for x in zf.namelist() if x.endswith('.img')][0]
                unzipped_path = zf.extract(img_file, path=tmpdir)

        # Generate metric contours
        # outputs hardcoded to data/contours_10m
        print('generating metric contours')
        cmd = ['bash', 'make_contours_10m.sh', unzipped_path]
        run(cmd, check=True)

        if bucket is not None:
            gj_path = Path('data/contour_10m') / (Path(unzipped_path).stem +
                                                  '.geojson')
            assert gj_path.exists(), 'file does not exist'
            print('generating metric mbtiles')
            mbtiles_path = run_tippecanoe(gj_path, metric=True)

            # Write mbtiles to S3
            s3.Bucket(bucket).upload_file(
                str(mbtiles_path), f'{bucket_prefix}/10m/{mbtiles_path.name}')

            # Delete geojson and mbtiles
            Path(gj_path).unlink(missing_ok=True)
            Path(mbtiles_path).unlink(missing_ok=True)

        # Generate imperial contours
        # outputs hardcoded to data/contours_40ft
        print('generating imperial contours')
        cmd = ['bash', 'make_contours_40ft.sh', unzipped_path]
        run(cmd, check=True)

        if bucket is not None:
            gj_path = Path('data/contour_40ft') / (Path(unzipped_path).stem +
                                                   '.geojson')
            assert gj_path.exists(), 'file does not exist'
            print('generating imperial mbtiles')
            mbtiles_path = run_tippecanoe(gj_path, metric=False)

            # Write mbtiles to S3
            s3.Bucket(bucket).upload_file(
                str(mbtiles_path), f'{bucket_prefix}/40ft/{mbtiles_path.name}')

            # Delete geojson and mbtiles
            Path(gj_path).unlink(missing_ok=True)
            Path(mbtiles_path).unlink(missing_ok=True)
예제 #13
0
    if f == None:
        conitnue
    print("processing file: " + str(file))
    content = f.read()

    soup = BeautifulSoup(content);
    waterfall = soup.find('div', {"id": "waterfall"})
    if waterfall == None:
        f.close()
        continue
    items = waterfall.find_all('a', {"class": "avatar-box"})
    if items == None:
        f.close()
        continue
    for item in items: 
        img_tag = item.find('img')
        if img_tag == None:
            f.close()
            continue
        name = img_tag['title']
        img_url = img_tag['src']
        print(name)
        url = item['href']
        dl.download_url(url, "../actress/" + name)
        if "nowprinting.gif" in img_url:
            print('No image, skip')
        else:
            dl.download_url(img_url, "../av_icon/" + name + ".jpg")
    os.rename(file, "../proceed_page/" + str(file))
    f.close()
예제 #14
0
    cur.executescript('''
        DROP TABLE IF EXISTS Products;
            
        CREATE TABLE Products (
            id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE, 
            filename TEXT UNIQUE,
            path TEXT,
            md5sum TEXT UNIQUE
            );
        ''')

    current_path = Path(__file__).parent.absolute()
    for i in range(len(href)):
        print("Dowloading file")
        zip_file = download_url(s, href[i]['href'], str(current_path) + '/')
        path = str(current_path) + zip_file
        print(path)
        print("Complete downloading file: ", zip_file)
        prod_link = href[i]['href']
        prod_id = prod_link[:-6]
        prod_checksum = prod_id + "Checksum/Value/$value"
        print("Getting file checksum")
        checksum = s.get(prod_checksum)
        dowloaded_file_checksum = md5(path)

        if checksum.text == dowloaded_file_checksum:
            print("Checksums match")
            cur.execute(
                '''INSERT OR IGNORE INTO Products (filename, path, md5sum)
                VALUES ( ? , ?, ? )''',
예제 #15
0
def get_music(req):
    try:
        filename = download_url(BASE_URL + '/get', os.path.expanduser(MUSIC_STORAGE), get_params=req.filename)
        return GetMusicResponse(success=True, filename=filename)
    except Exception as e:
        return GetMusicResponse(success=False, message=str(e))