Python Downloader.wait_for_finish Examples, pget.down.Downloader.wait_for_finish Python Examples

Example #1

0

Show file

def download_file_parallel(url, target_path, show_progress=False, num_threads=1):
    """
    Download the file from the given `url` and store it at `target_path`.
    Return a tuple x (url, bool, str).
    x[0] contains the url.
    If download failed x[1] is ``False`` and x[2] contains some error message.
    If download was fine x[1] is ``True`` and x[2] contains the target-path.
    """

    downloader = Downloader(url, target_path, num_threads)
    downloader.start()

    if show_progress:
        #
        # Wait until we know file size
        #
        while downloader.total_length == 0:
            pass

        pbar = tqdm(total=downloader.total_length, desc='Download File', unit_scale=True)

        def update_pbar(x):
            pbar.update(x.total_downloaded - pbar.n)

        downloader.subscribe(update_pbar, 10)

    downloader.wait_for_finish()

    if show_progress:
        pbar.close()

    return (url, True, target_path)

Example #2

0

Show file

def download_file_parallel(url, target_path, num_threads=1):
    """
    Download the file from the given `url` and store it at `target_path`.
    Return a tuple x (url, bool, str).
    x[0] contains the url.
    If download failed x[1] is ``False`` and x[2] contains some error message.
    If download was fine x[1] is ``True`` and x[2] contains the target-path.
    """

    downloader = Downloader(url, target_path, num_threads)
    downloader.start()

    # Wait until we know file size
    while downloader.total_length == 0:
        pass

    file_size = downloader.total_length
    logger.info('Download file from "%s" with size: %d B', url, file_size)

    bytes_at_last_log = 0

    def callback(x):
        nonlocal bytes_at_last_log

        if x.total_downloaded - bytes_at_last_log >= PROGRESS_LOGGER_BYTE_DELAY:
            logger.info('Download [%06.2f%%]', x.total_downloaded / file_size * 100)
            bytes_at_last_log = x.total_downloaded

    downloader.subscribe(callback, 10)
    downloader.wait_for_finish()

    logger.info('Finished download')

    return (url, True, target_path)

Example #3

0

Show file

File: pornhub.py Project: willjingmai/Pornhub_crawler

    def d(self, item, vpath, jpath, spider):

        #/usr/local/lib/python3.6/site-packages/pget
        h = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36',
            'Referer': 'https://www.google.com/'
        }
        # proxies=spider.proxies
        downloader = Downloader(item['url_video'],
                                vpath,
                                chunk_count=0,
                                high_speed=True,
                                headers=h,
                                proxies=spider.proxies)
        downloader.start()
        downloader.wait_for_finish()
        sec = self.check_video(vpath)
        if not sec:
            return

        with open(jpath, 'w+') as f:
            _t = int(time.time())
            sig = "sc%7*g{}@!$%".format(_t)
            md = hashlib.md5()
            md.update(sig.encode(encoding='utf-8'))
            sign = md.hexdigest()  # 2
            post_dict = {
                'time': _t,
                'sig': sign,
                'name': item['videoName'],
                'area': 'us',
                'cate': 'Beauty',
                'year': 2019,
                'director': '',
                'actor': '',
                'type': 'movie',
                'total': 1,
                'cover_url': item['name'] + '.jpg',
                'grade': 2.0,
                'mins': sec,
                'source_url': item['name'] + '.mp4',
                'resolution': item['resolution'],
                'part': 1,
                'intro': ''
            }
            json.dump(post_dict, f)
            spider.all_data.append(item['name'])
            write_data.open_data('dataList', item['name'] + '\n')

Example #4

0

Show file

def download_file(url, target_path):
    downloader = Downloader(url, target_path, 8)
    downloader.start()

    while downloader.total_length == 0:
        pass

    pbar = tqdm(total=downloader.total_length,
                desc='Download File',
                unit_scale=True)

    def update_pbar(x):
        pbar.update(x.total_downloaded - pbar.n)

    downloader.subscribe(update_pbar, 10)
    downloader.wait_for_finish()

    pbar.close()

Example #5

0

Show file

def readExtractedfiles():
    #print getCodesInCSVsForAllDatasets(quandl_apikey)
    f_list = []
    folderconvey = getCodesInCSVsForAllDatasets(quandl_apikey)
    for key in folderconvey.keys():
        f_list.append(key)
    #print f_list
    q_data_base_URL = "https://www.quandl.com/api/v3/datasets/{0}"

    filenamesList = []
    for (dirpath, dirnames, filenames) in walk(DEFAULT_DATA_PATH):
        filenamesList.extend(filenames)
    try:
        for fn in filenamesList:
            print fn
            try:
                dataset_qcodes = []
                logging.info(fn + " extracted.")
                codesFile = os.path.abspath(os.path.join(
                    DEFAULT_DATA_PATH, fn))
                with open(codesFile, 'r') as csv_file:
                    csvlines = csv_file.readlines()

                    for num, line in enumerate(csvlines[:5]):
                        codeline = line.split(',')
                        if len(codeline) > 1:
                            dataset_code = codeline[0]

                            dataset_descrpn = codeline[1]
                            download_url = q_data_base_URL.format(dataset_code)

                            data_URL = download_url + "?api_key=" + quandl_apikey
                            time.sleep(1)
                            resp = os.popen("curl " + data_URL)
                            resp_data = resp.read()
                            json_data = json.loads(resp_data)

                            #folderconvey = getCodesInCSVsForAllDatasets(quandl_apikey)

                            foldername = json_data["dataset"]["name"]
                            dat_code = json_data["dataset"]["database_code"]
                            #foldername = (foldername.replace('-', '').replace(' ', '_')).lower()
                            foldername = re.sub("[^A-Za-z0-9 ]+", "",
                                                foldername)
                            foldername = re.sub(" +", " ", foldername).replace(
                                " ", "_").lower()
                            print ">>>>>>>" + foldername
                            for name in f_list:
                                if name == dat_code:
                                    out_fldr_name = folderconvey[name]
                                    out_fldr_name = re.sub(
                                        "[^A-Za-z0-9 ]+", "", out_fldr_name)
                                    out_fldr_name = re.sub(
                                        " +", " ",
                                        out_fldr_name).replace(" ",
                                                               "_").lower()
                            try:
                                os.chdir(rootfolder)
                                if not os.path.isdir(out_fldr_name):
                                    os.mkdir(out_fldr_name)
                                os.chdir(out_fldr_name)
                                if not os.path.isdir(foldername):
                                    os.mkdir(foldername)
                                os.chdir(foldername)
                            except WindowsError:
                                continue
                            fileformat = ".csv"
                            if not os.path.isfile(
                                    dataset_code.split('/')[1] +
                                    '-datasets-codes' + fileformat):
                                urll = download_url + "/data.csv"

                                downloader = Downloader(
                                    urll,
                                    dataset_code.split('/')[1] + fileformat, 8)
                                downloader.start()
                                downloader.wait_for_finish()
            except:
                raise
                continue
    except:
        pass

Example #6

0

Show file

def LLloop():
    LLqueue = open(os.path.dirname(__file__) + '/cgi-bin/LLprogress',
                   "r+",
                   encoding='utf-8')
    lines = LLqueue.read()
    item = lines.split('\n', 1)[0]
    if len(item) > 10:
        LL_item = json.loads(item)
        url = LL_item[0]
        filename = 'files' + LL_item[1] + '/' + os.path.basename(LL_item[0])
        chunk = int(LL_item[2])
        type = int(LL_item[3])
        print('Start Downloading [' + url + ']')
        if type == 1:
            downloader = Downloader(url, filename, chunk)
            downloader.start()
            downloader.wait_for_finish()

        if type == 2:
            LLcurl = 'curl -k ' + url + ' --output ' + filename
            os.system(LLcurl)

        datastring = '["' + url + '"' + ',' + '"' + filename + '"' + ',' + '"' + LL_item[
            2] + '"' + ',' + '"' + LL_item[3] + '"' + "]\n"

        if os.path.isfile(filename):
            LLqueue = open(os.path.dirname(__file__) + '/cgi-bin/LLresult',
                           "a",
                           encoding='utf-8')
            LLqueue.write(datastring)
            LLqueue.close()
        else:
            LLqueue = open(os.path.dirname(__file__) + '/cgi-bin/LLerrors',
                           "a",
                           encoding='utf-8')
            LLqueue.write(datastring)
            LLqueue.close()
        print('Checking for next ...')

    else:
        print('Waiting mode ...')
    LLqueue = open(os.path.dirname(__file__) + '/cgi-bin/LLqueue',
                   "r+",
                   encoding='utf-8')
    lines = LLqueue.read()
    new_item = lines.split('\n', 1)[0]
    if len(new_item) > 10:
        LLprogress = open(os.path.dirname(__file__) + '/cgi-bin/LLprogress',
                          "w",
                          encoding='utf-8')
        LLprogress.write(new_item + "\n")
        LLprogress.close()
        LLqueue.close()
        LLqueue_old = open(os.path.dirname(__file__) + '/cgi-bin/LLqueue',
                           "r+",
                           encoding='utf-8')
        LLqueue_old.readline()
        new_LLqueue = open(os.path.dirname(__file__) + '/cgi-bin/LLqueue',
                           "w",
                           encoding='utf-8')
        shutil.copyfileobj(LLqueue_old, new_LLqueue)
    else:
        LLprogress = open(os.path.dirname(__file__) + '/cgi-bin/LLprogress',
                          "w",
                          encoding='utf-8')
        LLprogress.write('' + "\n")
        LLprogress.close()
        print('No new Item !')
    time.sleep(1)

Example #7

0

Show file

File: crrwdpython.py Project: dataguyRajesh/python

                fileformat = ".csv"
                filename = re.sub("[^A-Za-z0-9 ]", "", name).lower().replace(
                    " ", "_") + fileformat

                if original_url.split(".")[-1] == 'csv':

                    os.chdir(rootfolder)
                    if not os.path.isdir(filename):
                        os.mkdir(filename)
                    os.chdir(filename)
                    if not os.path.isfile(filename):
                        downloader = Downloader(original_url, filename, 8)
                        downloader.start()
                        print "came here"
                        print "downloading file " + filename
                        downloader.wait_for_finish()
                '''if original_url.split(".")[-1] == 'zip':
                        
                        #url = urllib.urlopen(original_url)
                        #zip_file = ZipFile(StringIO(url.read()))
                        #files = zipfile.namelist()
                        #fopen = open(filename+'.csv', 'w')                               
                        #zipcontent = url.read()
                        downloader = Downloader(original_url, filename+".zip", 8)
                        downloader.start()
                        downloader.wait_for_finish()
                        print "comitted here"
                        with zipfile.ZipFile(filename+".zip", "r") as zfr:
                           zfr.extractall(filename)
                        os.chdir(rootfolder)
                        os.remove(filename+".zip")'''

Example #8

0

Show file

File: datagov_datasets.py Project: dataguyRajesh/python

def extractFromJSON(domain, datasets_colln):
    datasets_cursor = datasets_colln.find()

    print(datasets_cursor)
    #files_download = 10
    for dataset in datasets_cursor:
        """if files_download == 0:
            break
        files_download = files_download - 1"""
        
        dataset_name = dataset["name"]
        print(">>>> " + dataset_name)
        
        available_formats = {}
        res_format = None
        res_urls = {}
        other_formats = {}
        for i, res in enumerate(dataset["resources"]):
            available_formats[i] = {
                    "format": res["format"],
                    "url" : res["url"],
                    "filename" : res["id"]
                    }

        #print(available_formats)
        for a in available_formats.values():
            if 'JSONL' in a["format"]:
                res_format = 'JSONL'
                break
            elif 'jsonl' in a["format"]:
                res_format = 'jsonl'
                break
            elif 'CSV' in a["format"]:
                res_format = 'CSV'
                break
            elif 'csv' in a["format"]:
                res_format = 'csv'
                break
            elif 'JSON' in a["format"]:
                res_format = 'JSON'
                break
            elif 'json' in a["format"]:
                res_format = 'json'
                break
            """elif 'XLS' in a["format"]:
                res_format = 'XLS'
                break
            elif 'xls' in a["format"]:
                res_format = 'xls'
                break
            else:
                other_formats[a["url"]] = a["filename"]
                with open("fileformats.txt", "a+") as fileformats:
                    fileformats.write(a["filename"] + ',' + a["format"] + ',' \
                                      + a["url"] + '\n')"""

        if res_format is None:
            continue
            #res_urls = other_formats
        else:
            #continue
            for a in available_formats.values():
                if res_format in a["format"]:
                    res_urls[a["url"]] = a["filename"]

        os.chdir(root_folder)

        if not os.path.isdir(dataset_name):
            os.mkdir(dataset_name)
        os.chdir(dataset_name)

        print(str(res_format) + " :: " + str(len(res_urls)))

        for res_url in res_urls.keys():
            if res_format is None:
                file_name = res_urls[res_url]
            else:
                file_name = res_urls[res_url] + "." + (res_format).lower()

            print("Downloading... " + file_name)
            #print("... from >> " + res_url)
            try:
                if not os.path.isfile(file_name):
                    sleep(1)
                    downloader = Downloader(res_url, file_name, 8)
                    downloader.start()
                    downloader.wait_for_finish()
                    
                    """resp = urllib.request.urlopen(res_url)
                    resp_content = resp.read()
                    print("Writing...")
                    with open(file_name, 'wb') as res_file:
                        res_file.write(resp_content)"""
            except:
                print("Error @ " + dataset_name)
                continue