Beispiel #1
0
def download_and_extract_from_fallback_url(fallback_url,
                                           filename,
                                           relative_src_dir,
                                           download_dir,
                                           extract_dir,
                                           sha1_hash_expected=None):
    p = urlparse(fallback_url)
    new_path = p[2] + "/" + relative_src_dir + "/" + filename
    fallback_download_url = urlunparse(
        [p[0], p[1], new_path, p[3], p[4], p[5]])

    download_filename = eos.util.download_file(
        fallback_download_url,
        download_dir,
        sha1_hash_expected=sha1_hash_expected)
    if download_filename == "":
        eos.log_error("downloading of file from fallback URL " +
                      fallback_download_url + " failed")
        return False

    if os.path.exists(extract_dir):
        shutil.rmtree(extract_dir)

    if not eos.archive.extract_file(download_filename, extract_dir):
        eos.log_error("extraction of file " + download_filename +
                      " from fallback URL " + fallback_download_url +
                      " failed")
        return False

    return True
Beispiel #2
0
 def get_parser(self, lwa):
     # https://stackoverflow.com/questions/15799696/library-to-build-urls-in-python
     clwa = list(urlparse(lwa))
     for cid, cname in enumerate(self.parsers):
         if cname.__name__.lower() in clwa[1]:
             clwa[2] = ''
             return (cid, self.parsers[cid], urlunparse(clwa))
Beispiel #3
0
def query_joiner(text=None):
	components = OrderedDict([('language', 'en'), ('text', text)])
	query_items = list(components.items())
	query = '&'.join(['='.join(field) for field in query_items])
	terms = OrderedDict(
		[('scheme', 'http'), ('netloc', 'conceptnet5.media.mit.edu'), ('path', '/data/5.4/uri'), ('params', ''),
		 ('query', query), ('fragment', '')])
	return urlunparse(terms.values())
Beispiel #4
0
def downloadFile(url,
                 download_dir,
                 target_dir_name,
                 sha1_hash=None,
                 force_download=False,
                 user_agent=DEFAULT_USER_AGENT):
    if not os.path.isdir(download_dir):
        os.mkdir(download_dir)

    p = urlparse(url)
    url = urlunparse([p[0], p[1], quote(p[2]), p[3], p[4],
                      p[5]])  # replace special characters in the URL path

    filename_rel = os.path.split(p.path)[1]  # get original filename
    target_filename = os.path.join(download_dir, filename_rel)

    # check SHA1 hash, if file already exists
    if os.path.exists(
            target_filename) and sha1_hash is not None and sha1_hash != "":
        hash_file = computeFileHash(target_filename)
        if hash_file != sha1_hash:
            log("Hash of " + target_filename + " (" + hash_file +
                ") does not match expected hash (" + sha1_hash +
                "); forcing download")
            force_download = True

    # download file
    if (not os.path.exists(target_filename)) or force_download:
        log("Downloading " + url + " to " + target_filename)
        if p.scheme == "ssh":
            downloadSCP(p.hostname, p.username, p.path, download_dir)
        else:
            if user_agent is not None:
                opener = urllib.request.build_opener()
                opener.addheaders = [('User-agent', user_agent)]
                f = open(target_filename, 'wb')
                f.write(opener.open(url).read())
                f.close()
            else:
                urlretrieve(url, target_filename)
    else:
        log("Skipping download of " + url + "; already downloaded")

    # check SHA1 hash
    if sha1_hash is not None and sha1_hash != "":
        hash_file = computeFileHash(target_filename)
        if hash_file != sha1_hash:
            raise RuntimeError("Hash of " + target_filename + " (" +
                               hash_file + ") differs from expected hash (" +
                               sha1_hash + ")")

    return target_filename
def main():
    host="www.google.co.kr"
    
    conn = http.client.HTTPConnection(host)
    conn.request("GET", '')
    resp = conn.getresponse()
    data = resp.read().decode(resp.headers.get_content_charset())
    conn.close()
    
    url = urlunparse(('http',host,'','','',''))
    print(url)
    
    downloadImage(url,data)
def main():
    host = "www.google.com"

    conn = client.HTTPConnection(host)
    conn.request("GET", '')
    resp = conn.getresponse()

    charset = resp.headers.get_content_charset()
    data = resp.read().decode(charset)
    conn.close()

    print("\n>>>>>>>>> Download Images from", host)
    url = request.urlunparse(('http', host, '', '', '', ''))
    downloadImage(url, data)
Beispiel #7
0
def parse_images(url, page, out_folder):
    soup = bs(page, "html.parser")
    [x.extract() for x in soup.findAll('script')]
    parsed = list(urlparse(url))

    for image in soup.findAll("img"):
        print("Image: %(src)s" % image)
        filename = image["src"].split("/")[-1]
        parsed[2] = image["src"]
        outpath = os.path.join(out_folder, "images", filename)
        if image["src"].lower().startswith("http"):
            urlretrieve(image["src"], outpath)
        else:
            urlretrieve(urlunparse(parsed), outpath)
Beispiel #8
0
def main(url, out_folder="/test/"):
    """Downloads all the images at 'url' to /test/"""
    soup = bs(urlopen(url), features='html.parser')
    parsed = list(urlparse(url))

    for image in soup.findAll("img"):
        print("Image: %(src)s" % image)
        filename = image["src"].split("/")[-1]
        parsed[2] = image["src"]
        outpath = os.path.join(out_folder, filename)
        if image["src"].lower().startswith("http"):
            urlretrieve(image["src"], outpath)
        else:
            urlretrieve(urlunparse(parsed), outpath)
Beispiel #9
0
    def construct_url(query, max_position=None):
        """
        For a given query, will construct a URL to search Twitter with
        :param query: The query term used to search twitter
        :param max_position: The max_position value to select the next pagination of tweets
        :return: A string URL
        """

        params = {
            # Type Param
            'f': 'tweets',
            # Query Param
            'q': query
        }

        # If our max_position param is not None, we add it to the parameters
        if max_position is not None:
            params['max_position'] = max_position

        url_tuple = ('https', 'twitter.com', '/i/search/timeline',
                     '', urlencode(params), '')
        return urlunparse(url_tuple)
def downloadFile(url, download_dir, target_dir_name, sha1_hash = None, force_download = False, user_agent = None):
    if not os.path.isdir(download_dir):
        os.mkdir(download_dir)

    p = urlparse(url)
    url = urlunparse([p[0], p[1], quote(p[2]), p[3], p[4], p[5]]) # replace special characters in the URL path

    filename_rel = os.path.split(p.path)[1] # get original filename
    target_filename = os.path.join(download_dir, filename_rel)

    # check SHA1 hash, if file already exists
    if os.path.exists(target_filename) and sha1_hash is not None and sha1_hash != "":
        hash_file = computeFileHash(target_filename)
        if hash_file != sha1_hash:
            log("Hash of " + target_filename + " (" + hash_file + ") does not match expected hash (" + sha1_hash + "); forcing download")
            force_download = True

    # download file
    if (not os.path.exists(target_filename)) or force_download:
        log("Downloading " + url + " to " + target_filename)
        if p.scheme == "ssh":
            downloadSCP(p.hostname, p.username, p.path, download_dir)
        else:
            if user_agent is not None:
                MyURLOpener.version = user_agent
                MyURLOpener().retrieve(url, target_filename)
            else:
                urlretrieve(url, target_filename)
    else:
        log("Skipping download of " + url + "; already downloaded")

    # check SHA1 hash
    if sha1_hash is not None and sha1_hash != "":
        hash_file = computeFileHash(target_filename)
        if hash_file != sha1_hash:
            raise RuntimeError("Hash of " + target_filename + " (" + hash_file + ") differs from expected hash (" + sha1_hash + ")")

    return target_filename
Beispiel #11
0
def sanitize_url(url):
    p = urlparse(url)
    url = urlunparse([p[0], p[1], quote(p[2]), p[3], p[4],
                      p[5]])  # quote special characters in the path
    return url
def main(argv):
    global BASE_DIR, SRC_DIR, ARCHIVE_DIR, DEBUG_OUTPUT, FALLBACK_URL, USE_TAR, USE_UNZIP
    global TOOL_COMMAND_PYTHON, TOOL_COMMAND_GIT, TOOL_COMMAND_HG, TOOL_COMMAND_SVN, TOOL_COMMAND_PATCH, TOOL_COMMAND_TAR, TOOL_COMMAND_UNZIP

    try:
        opts, args = getopt.getopt(
            argv,
            "ln:N:cCb:h",
            ["list", "name=", "name-file=", "clean", "clean-all", "base-dir", "bootstrap-file=", "local-bootstrap-file=", "use-tar", "use-unzip", "repo-snapshots", "fallback-url=", "force-fallback", "debug-output", "help"])
    except getopt.GetoptError:
        printOptions()
        return 0

    opt_names = []
    name_files = []
    opt_clean = False
    opt_clean_archives = False
    list_libraries = False

    default_bootstrap_filename = "bootstrap.json"
    bootstrap_filename = os.path.abspath(os.path.join(BASE_DIR, default_bootstrap_filename))
    local_bootstrap_filename = ""
    create_repo_snapshots = False
    force_fallback = False

    base_dir_path = ""

    for opt, arg in opts:
        if opt in ("-h", "--help"):
            printOptions()
            return 0
        if opt in ("-l", "--list"):
            list_libraries = True
        if opt in ("-n", "--name"):
            opt_names.append(arg)
        if opt in ("-N", "--name-file"):
            name_files.append(os.path.abspath(arg))
        if opt in ("-c", "--clean"):
            opt_clean = True
        if opt in ("-C", "--clean-all"):
            opt_clean = True
            opt_clean_archives = True
        if opt in ("-b", "--base-dir"):
            base_dir_path = os.path.abspath(arg)
            BASE_DIR = base_dir_path
            SRC_DIR = os.path.join(BASE_DIR, SRC_DIR_BASE)
            ARCHIVE_DIR = os.path.join(BASE_DIR, ARCHIVE_DIR_BASE)
            bootstrap_filename = os.path.join(BASE_DIR, default_bootstrap_filename)
            log("Using " + arg + " as base directory")
        if opt in ("--bootstrap-file",):
            bootstrap_filename = os.path.abspath(arg)
            log("Using main bootstrap file " + bootstrap_filename)
        if opt in ("--local-bootstrap-file",):
            local_bootstrap_filename = os.path.abspath(arg)
            log("Using local bootstrap file " + local_bootstrap_filename)
        if opt in ("--use-tar",):
            USE_TAR = True
        if opt in ("--use-unzip",):
            USE_UNZIP = True
        if opt in ("--repo-snapshots",):
            create_repo_snapshots = True
            log("Will create repository snapshots")
        if opt in ("--fallback-url",):
            FALLBACK_URL = arg
        if opt in ("--force-fallback",):
            force_fallback = True
            log("Using fallback URL to fetch all libraries")
        if opt in ("--debug-output",):
            DEBUG_OUTPUT = True

    if platform.system() is not "Windows":
        # Unfortunately some IDEs do not have a proper PATH environment variable set,
        # so we search manually for the required tools in some obvious locations.
        paths_to_search = os.environ["PATH"].split(":") + ["/usr/local/bin", "/opt/local/bin", "/usr/bin"]
        TOOL_COMMAND_PYTHON = findToolCommand(TOOL_COMMAND_PYTHON, paths_to_search, required = True)
        TOOL_COMMAND_GIT = findToolCommand(TOOL_COMMAND_GIT, paths_to_search, required = True)
        TOOL_COMMAND_HG = findToolCommand(TOOL_COMMAND_HG, paths_to_search, required = True)
        TOOL_COMMAND_SVN = findToolCommand(TOOL_COMMAND_SVN, paths_to_search, required = True)
        TOOL_COMMAND_PATCH = findToolCommand(TOOL_COMMAND_PATCH, paths_to_search, required = True)
        TOOL_COMMAND_TAR = findToolCommand(TOOL_COMMAND_TAR, paths_to_search, required = USE_TAR)
        TOOL_COMMAND_UNZIP = findToolCommand(TOOL_COMMAND_UNZIP, paths_to_search, required = USE_UNZIP)

    if base_dir_path:
        os.chdir(base_dir_path)

    if name_files:
        for name_file in name_files:
            try:
                with open(name_file) as f:
                    opt_names_local = [l for l in (line.strip() for line in f) if l]
                    opt_names_local = [l for l in opt_names_local if l[0] is not '#']
                    opt_names += opt_names_local
                    dlog("Name file contains: " + ", ".join(opt_names_local))
            except:
                log("ERROR: cannot parse name file " + name_file)
                return -1

    if force_fallback and not FALLBACK_URL:
        log("Error: cannot force usage of the fallback location without specifying a fallback URL")
        return -1;

    state_filename = os.path.join(os.path.dirname(os.path.splitext(bootstrap_filename)[0]), \
                                  "." + os.path.basename(os.path.splitext(bootstrap_filename)[0])) \
                     + os.path.splitext(bootstrap_filename)[1]

    dlog("bootstrap_filename = " + bootstrap_filename)
    dlog("state_filename     = " + state_filename)

    # read canonical libraries data
    data = readJSONData(bootstrap_filename)
    if data is None:
        return -1;

    # some sanity checking
    for library in data:
        if library.get('name', None) is None:
            log("ERROR: Invalid schema: library object does not have a 'name'")
            return -1

    # read local libraries data, if available
    local_data = None
    if local_bootstrap_filename:
        local_data = readJSONData(local_bootstrap_filename)

        if local_data is None:
            return -1;

        # some sanity checking
        for local_library in local_data:
            if local_library.get('name', None) is None:
                log("ERROR: Invalid schema: local library object does not have a 'name'")
                return -1

    # merge canonical and local library data, if applicable; local libraries take precedence
    if local_data is not None:
        for local_library in local_data:
            local_name = local_library.get('name', None)
            found_canonical_library = False
            for n, library in enumerate(data):
                name = library.get('name', None)
                if local_name == name:
                    data[n] = local_library # overwrite library
                    found_canonical_library = True
            if not found_canonical_library:
                data.append(local_library)

    if list_libraries:
        listLibraries(data)
        return 0

    sdata = []
    if os.path.exists(state_filename):
        sdata = readJSONData(state_filename)

    # create source directory
    if not os.path.isdir(SRC_DIR):
        log("Creating directory " + SRC_DIR)
        os.mkdir(SRC_DIR)

    # create archive files directory
    if not os.path.isdir(ARCHIVE_DIR):
        log("Creating directory " + ARCHIVE_DIR)
        os.mkdir(ARCHIVE_DIR)

    failed_libraries = []

    for library in data:
        name = library.get('name', None)
        source = library.get('source', None)
        post = library.get('postprocess', None)

        if (opt_names) and (not name in opt_names):
            continue

        lib_dir = os.path.join(SRC_DIR, name)

        dlog("********** LIBRARY " + name + " **********")
        dlog("lib_dir = " + lib_dir + ")")

        # compare against cached state
        cached_state_ok = False
        if not opt_clean:
            for slibrary in sdata:
                sname = slibrary.get('name', None)
                if sname is not None and sname == name and slibrary == library and os.path.exists(lib_dir):
                    cached_state_ok = True
                    break

        if cached_state_ok:
            log("Cached state for " + name + " equals expected state; skipping library")
            continue
        else:
            # remove cached state for library
            sdata[:] = [s for s in sdata if not (lambda s, name : s.get('name', None) is not None and s['name'] == name)(s, name)]

        # create library directory, if necessary
        if opt_clean:
            log("Cleaning directory for " + name)
            if os.path.exists(lib_dir):
                shutil.rmtree(lib_dir)
        if not os.path.exists(lib_dir):
            os.mkdir(lib_dir)

        try:
            # download source
            if source is not None:
                if 'type' not in source:
                    log("ERROR: Invalid schema for " + name + ": 'source' object must have a 'type'")
                    return -1
                if 'url' not in source:
                    log("ERROR: Invalid schema for " + name + ": 'source' object must have a 'url'")
                    return -1
                src_type = source['type']
                src_url = source['url']

                if src_type == "sourcefile":
                    sha1 = source.get('sha1', None)
                    user_agent = source.get('user-agent', None)
                    try:
                        if force_fallback:
                            raise RuntimeError
                        downloadFile(src_url, ARCHIVE_DIR, name, sha1, force_download = opt_clean_archives, user_agent = user_agent)
                        filename_rel = os.path.basename(src_url)
                        shutil.copyfile( os.path.join(ARCHIVE_DIR, filename_rel), os.path.join(lib_dir, filename_rel) )
                    except:
                        if FALLBACK_URL:
                            if not force_fallback:
                                log("WARNING: Downloading of file " + src_url + " failed; trying fallback")

                            p = urlparse(src_url)
                            filename_rel = os.path.split(p.path)[1] # get original filename
                            p = urlparse(FALLBACK_URL)
                            fallback_src_url = urlunparse([p[0], p[1], p[2] + "/" + ARCHIVE_DIR_BASE + "/" + filename_rel, p[3], p[4], p[5]])
                            downloadFile(fallback_src_url, ARCHIVE_DIR, name, sha1, force_download = True)
                            shutil.copyfile( os.path.join(ARCHIVE_DIR, filename_rel), os.path.join(lib_dir, filename_rel) )
                        else:
                            shutil.rmtree(lib_dir)
                            raise
                elif src_type == "archive":
                    sha1 = source.get('sha1', None)
                    user_agent = source.get('user-agent', None)
                    try:
                        if force_fallback:
                            raise RuntimeError
                        downloadAndExtractFile(src_url, ARCHIVE_DIR, name, sha1, force_download = opt_clean_archives, user_agent = user_agent)
                    except:
                        if FALLBACK_URL:
                            if not force_fallback:
                                log("WARNING: Downloading of file " + src_url + " failed; trying fallback")

                            p = urlparse(src_url)
                            filename_rel = os.path.split(p.path)[1] # get original filename
                            p = urlparse(FALLBACK_URL)
                            fallback_src_url = urlunparse([p[0], p[1], p[2] + "/" + ARCHIVE_DIR_BASE + "/" + filename_rel, p[3], p[4], p[5]])
                            downloadAndExtractFile(fallback_src_url, ARCHIVE_DIR, name, sha1, force_download = True)
                        else:
                            raise

                else:
                    revision = source.get('revision', None)

                    archive_name = name + ".tar.gz" # for reading or writing of snapshot archives
                    if revision is not None:
                        archive_name = name + "_" + revision + ".tar.gz"

                    try:
                        if force_fallback:
                            raise RuntimeError
                        cloneRepository(src_type, src_url, name, revision)

                        if create_repo_snapshots:
                            log("Creating snapshot of library repository " + name)
                            repo_dir = os.path.join(SRC_DIR, name)
                            archive_filename = os.path.join(SNAPSHOT_DIR, archive_name)

                            dlog("Snapshot will be saved as " + archive_filename)
                            createArchiveFromDirectory(repo_dir, archive_filename, revision is None)

                    except:
                        if FALLBACK_URL:
                            if not force_fallback:
                                log("WARNING: Cloning of repository " + src_url + " failed; trying fallback")

                            # copy archived snapshot from fallback location
                            p = urlparse(FALLBACK_URL)
                            fallback_src_url = urlunparse([p[0], p[1], p[2] + "/" + SNAPSHOT_DIR_BASE + "/" + archive_name, p[3], p[4], p[5]])
                            dlog("Looking for snapshot " + fallback_src_url + " of library repository " + name)

                            # create snapshots files directory
                            downloadAndExtractFile(fallback_src_url, SNAPSHOT_DIR, name, force_download = True)

                            # reset repository state to particular revision (only using local operations inside the function)
                            cloneRepository(src_type, src_url, name, revision, True)
                        else:
                            raise
            else:
                # set up clean directory for potential patch application
                shutil.rmtree(lib_dir)
                os.mkdir(lib_dir)

            # post-processing
            if post is not None:
                if 'type' not in post:
                    log("ERROR: Invalid schema for " + name + ": 'postprocess' object must have a 'type'")
                    return -1
                if 'file' not in post:
                    log("ERROR: Invalid schema for " + name + ": 'postprocess' object must have a 'file'")
                    return -1
                post_type = post['type']
                post_file = post['file']

                if post_type == "patch":
                    applyPatchFile(post_file, name, post.get('pnum', DEFAULT_PNUM))
                elif post_type == "script":
                    runPythonScript(post_file)
                else:
                    log("ERROR: Unknown post-processing type '" + post_type + "' for " + name)
                    return -1

            # add to cached state
            sdata.append(library)

            # write out cached state
            writeJSONData(sdata, state_filename)
        except:
            log("ERROR: Failure to bootstrap library " + name + " (reason: " + str(sys.exc_info()[0]) + ")")
            traceback.print_exc()
            failed_libraries.append(name)

    if failed_libraries:
        log("***************************************")
        log("FAILURE to bootstrap the following libraries:")
        log(', '.join(failed_libraries))
        log("***************************************")
        return -1

    log("Finished")

    return 0
def main(argv):
    global BASE_DIR, SRC_DIR, ARCHIVE_DIR, DEBUG_OUTPUT, FALLBACK_URL, USE_TAR, USE_UNZIP
    global TOOL_COMMAND_PYTHON, TOOL_COMMAND_GIT, TOOL_COMMAND_HG, TOOL_COMMAND_SVN, TOOL_COMMAND_PATCH, TOOL_COMMAND_TAR, TOOL_COMMAND_UNZIP

    try:
        opts, args = getopt.getopt(argv, "ln:N:cCb:h", [
            "list", "name=", "name-file=", "skip=", "clean", "clean-all",
            "base-dir", "bootstrap-file=", "local-bootstrap-file=", "use-tar",
            "use-unzip", "repo-snapshots", "fallback-url=", "force-fallback",
            "debug-output", "help", "break-on-first-error"
        ])
    except getopt.GetoptError:
        printOptions()
        return 0

    opt_names = []
    name_files = []
    skip_libs = []
    opt_clean = False
    opt_clean_archives = False
    list_libraries = False

    default_bootstrap_filename = "bootstrap.json"
    bootstrap_filename = os.path.abspath(
        os.path.join(BASE_DIR, default_bootstrap_filename))
    local_bootstrap_filename = ""
    create_repo_snapshots = False
    force_fallback = False
    break_on_first_error = False

    base_dir_path = ""

    for opt, arg in opts:
        if opt in ("-h", "--help"):
            printOptions()
            return 0
        if opt in ("-l", "--list"):
            list_libraries = True
        if opt in ("-n", "--name"):
            opt_names.append(arg)
        if opt in ("-N", "--name-file"):
            name_files.append(os.path.abspath(arg))
        if opt in ("--skip", ):
            skip_libs.append(arg)
        if opt in ("-c", "--clean"):
            opt_clean = True
        if opt in ("-C", "--clean-all"):
            opt_clean = True
            opt_clean_archives = True
        if opt in ("-b", "--base-dir"):
            base_dir_path = os.path.abspath(arg)
            BASE_DIR = base_dir_path
            SRC_DIR = os.path.join(BASE_DIR, SRC_DIR_BASE)
            ARCHIVE_DIR = os.path.join(BASE_DIR, ARCHIVE_DIR_BASE)
            bootstrap_filename = os.path.join(BASE_DIR,
                                              default_bootstrap_filename)
            log("Using " + arg + " as base directory")
        if opt in ("--bootstrap-file", ):
            bootstrap_filename = os.path.abspath(arg)
            log("Using main bootstrap file " + bootstrap_filename)
        if opt in ("--local-bootstrap-file", ):
            local_bootstrap_filename = os.path.abspath(arg)
            log("Using local bootstrap file " + local_bootstrap_filename)
        if opt in ("--use-tar", ):
            USE_TAR = True
        if opt in ("--use-unzip", ):
            USE_UNZIP = True
        if opt in ("--repo-snapshots", ):
            create_repo_snapshots = True
            log("Will create repository snapshots")
        if opt in ("--fallback-url", ):
            FALLBACK_URL = arg
        if opt in ("--force-fallback", ):
            force_fallback = True
            log("Using fallback URL to fetch all libraries")
        if opt in ("--break-on-first-error", ):
            break_on_first_error = True
        if opt in ("--debug-output", ):
            DEBUG_OUTPUT = True

    if platform.system() != "Windows":
        # Unfortunately some IDEs do not have a proper PATH environment variable set,
        # so we search manually for the required tools in some obvious locations.
        paths_to_search = os.environ["PATH"].split(":") + [
            "/usr/local/bin", "/opt/local/bin", "/usr/bin"
        ]
        TOOL_COMMAND_PYTHON = findToolCommand(TOOL_COMMAND_PYTHON,
                                              paths_to_search,
                                              required=True)
        TOOL_COMMAND_GIT = findToolCommand(TOOL_COMMAND_GIT,
                                           paths_to_search,
                                           required=True)
        TOOL_COMMAND_HG = findToolCommand(TOOL_COMMAND_HG,
                                          paths_to_search,
                                          required=True)
        TOOL_COMMAND_SVN = findToolCommand(TOOL_COMMAND_SVN,
                                           paths_to_search,
                                           required=True)
        TOOL_COMMAND_PATCH = findToolCommand(TOOL_COMMAND_PATCH,
                                             paths_to_search,
                                             required=True)
        TOOL_COMMAND_TAR = findToolCommand(TOOL_COMMAND_TAR,
                                           paths_to_search,
                                           required=USE_TAR)
        TOOL_COMMAND_UNZIP = findToolCommand(TOOL_COMMAND_UNZIP,
                                             paths_to_search,
                                             required=USE_UNZIP)

    if base_dir_path:
        os.chdir(base_dir_path)

    if name_files:
        for name_file in name_files:
            try:
                with open(name_file) as f:
                    opt_names_local = [
                        l for l in (line.strip() for line in f) if l
                    ]
                    opt_names_local = [
                        l for l in opt_names_local if l[0] != '#'
                    ]
                    opt_names += opt_names_local
                    dlog("Name file contains: " + ", ".join(opt_names_local))
            except:
                log("ERROR: cannot parse name file " + name_file)
                return -1

    if force_fallback and not FALLBACK_URL:
        log("Error: cannot force usage of the fallback location without specifying a fallback URL"
            )
        return -1

    state_filename = os.path.join(os.path.dirname(os.path.splitext(bootstrap_filename)[0]), \
                                  "." + os.path.basename(os.path.splitext(bootstrap_filename)[0])) \
                     + os.path.splitext(bootstrap_filename)[1]

    dlog("bootstrap_filename = " + bootstrap_filename)
    dlog("state_filename     = " + state_filename)

    # read canonical libraries data
    data = readJSONData(bootstrap_filename)
    if data is None:
        return -1

    # some sanity checking
    for library in data:
        if library.get('name', None) is None:
            log("ERROR: Invalid schema: library object does not have a 'name'")
            return -1

    # read local libraries data, if available
    local_data = None
    if local_bootstrap_filename:
        local_data = readJSONData(local_bootstrap_filename)

        if local_data is None:
            return -1

        # some sanity checking
        for local_library in local_data:
            if local_library.get('name', None) is None:
                log("ERROR: Invalid schema: local library object does not have a 'name'"
                    )
                return -1

    # merge canonical and local library data, if applicable; local libraries take precedence
    if local_data is not None:
        for local_library in local_data:
            local_name = local_library.get('name', None)
            found_canonical_library = False
            for n, library in enumerate(data):
                name = library.get('name', None)
                if local_name == name:
                    data[n] = local_library  # overwrite library
                    found_canonical_library = True
            if not found_canonical_library:
                data.append(local_library)

    if list_libraries:
        listLibraries(data)
        return 0

    sdata = []
    if os.path.exists(state_filename):
        sdata = readJSONData(state_filename)

    # create source directory
    if not os.path.isdir(SRC_DIR):
        log("Creating directory " + SRC_DIR)
        os.mkdir(SRC_DIR)

    # create archive files directory
    if not os.path.isdir(ARCHIVE_DIR):
        log("Creating directory " + ARCHIVE_DIR)
        os.mkdir(ARCHIVE_DIR)

    failed_libraries = []

    for library in data:
        name = library.get('name', None)
        source = library.get('source', None)
        post = library.get('postprocess', None)

        if (skip_libs) and (name in skip_libs):
            continue

        if (opt_names) and (not name in opt_names):
            continue

        lib_dir = os.path.join(SRC_DIR, name)
        lib_dir = lib_dir.replace(os.path.sep, '/')

        dlog("********** LIBRARY " + name + " **********")
        dlog("lib_dir = " + lib_dir + ")")

        # compare against cached state
        cached_state_ok = False
        if not opt_clean:
            for slibrary in sdata:
                sname = slibrary.get('name', None)
                if sname is not None and sname == name and slibrary == library and os.path.exists(
                        lib_dir):
                    cached_state_ok = True
                    break

        if cached_state_ok:
            log("Cached state for " + name +
                " equals expected state; skipping library")
            continue
        else:
            # remove cached state for library
            sdata[:] = [
                s for s in sdata if not (lambda s, name: s.get(
                    'name', None) is not None and s['name'] == name)(s, name)
            ]

        # create library directory, if necessary
        if opt_clean:
            log("Cleaning directory for " + name)
            if os.path.exists(lib_dir):
                shutil.rmtree(lib_dir)
        if not os.path.exists(lib_dir):
            os.makedirs(lib_dir)

        try:
            # download source
            if source is not None:
                if 'type' not in source:
                    log("ERROR: Invalid schema for " + name +
                        ": 'source' object must have a 'type'")
                    return -1
                if 'url' not in source:
                    log("ERROR: Invalid schema for " + name +
                        ": 'source' object must have a 'url'")
                    return -1
                src_type = source['type']
                src_url = source['url']

                if src_type == "sourcefile":
                    sha1 = source.get('sha1', None)
                    user_agent = source.get('user-agent', None)
                    try:
                        if force_fallback:
                            raise RuntimeError
                        downloadFile(src_url,
                                     ARCHIVE_DIR,
                                     name,
                                     sha1,
                                     force_download=opt_clean_archives,
                                     user_agent=user_agent)
                        filename_rel = os.path.basename(src_url)
                        shutil.copyfile(
                            os.path.join(ARCHIVE_DIR, filename_rel),
                            os.path.join(lib_dir, filename_rel))
                    except:
                        if FALLBACK_URL:
                            if not force_fallback:
                                log("WARNING: Downloading of file " + src_url +
                                    " failed; trying fallback")

                            p = urlparse(src_url)
                            filename_rel = os.path.split(
                                p.path)[1]  # get original filename
                            p = urlparse(FALLBACK_URL)
                            fallback_src_url = urlunparse([
                                p[0], p[1], p[2] + "/" + ARCHIVE_DIR_BASE +
                                "/" + filename_rel, p[3], p[4], p[5]
                            ])
                            downloadFile(fallback_src_url,
                                         ARCHIVE_DIR,
                                         name,
                                         sha1,
                                         force_download=True)
                            shutil.copyfile(
                                os.path.join(ARCHIVE_DIR, filename_rel),
                                os.path.join(lib_dir, filename_rel))
                        else:
                            shutil.rmtree(lib_dir)
                            raise
                elif src_type == "archive":
                    sha1 = source.get('sha1', None)
                    user_agent = source.get('user-agent', None)
                    try:
                        if force_fallback:
                            raise RuntimeError
                        downloadAndExtractFile(
                            src_url,
                            ARCHIVE_DIR,
                            name,
                            sha1,
                            force_download=opt_clean_archives,
                            user_agent=user_agent)
                    except:
                        if FALLBACK_URL:
                            if not force_fallback:
                                log("WARNING: Downloading of file " + src_url +
                                    " failed; trying fallback")

                            p = urlparse(src_url)
                            filename_rel = os.path.split(
                                p.path)[1]  # get original filename
                            p = urlparse(FALLBACK_URL)
                            fallback_src_url = urlunparse([
                                p[0], p[1], p[2] + "/" + ARCHIVE_DIR_BASE +
                                "/" + filename_rel, p[3], p[4], p[5]
                            ])
                            downloadAndExtractFile(fallback_src_url,
                                                   ARCHIVE_DIR,
                                                   name,
                                                   sha1,
                                                   force_download=True)
                        else:
                            raise

                else:
                    revision = source.get('revision', None)

                    archive_name = name + ".tar.gz"  # for reading or writing of snapshot archives
                    if revision is not None:
                        archive_name = name + "_" + revision + ".tar.gz"

                    try:
                        if force_fallback:
                            raise RuntimeError
                        cloneRepository(src_type, src_url, name, revision)

                        if create_repo_snapshots:
                            log("Creating snapshot of library repository " +
                                name)
                            repo_dir = os.path.join(SRC_DIR, name)
                            archive_filename = os.path.join(
                                SNAPSHOT_DIR, archive_name)

                            dlog("Snapshot will be saved as " +
                                 archive_filename)
                            createArchiveFromDirectory(repo_dir,
                                                       archive_filename,
                                                       revision is None)

                    except:
                        if FALLBACK_URL:
                            if not force_fallback:
                                log("WARNING: Cloning of repository " +
                                    src_url + " failed; trying fallback")

                            # copy archived snapshot from fallback location
                            p = urlparse(FALLBACK_URL)
                            fallback_src_url = urlunparse([
                                p[0], p[1], p[2] + "/" + SNAPSHOT_DIR_BASE +
                                "/" + archive_name, p[3], p[4], p[5]
                            ])
                            dlog("Looking for snapshot " + fallback_src_url +
                                 " of library repository " + name)

                            # create snapshots files directory
                            downloadAndExtractFile(fallback_src_url,
                                                   SNAPSHOT_DIR,
                                                   name,
                                                   force_download=True)

                            # reset repository state to particular revision (only using local operations inside the function)
                            cloneRepository(src_type, src_url, name, revision,
                                            True)
                        else:
                            raise
            else:
                # set up clean directory for potential patch application
                shutil.rmtree(lib_dir)
                os.mkdir(lib_dir)

            # post-processing
            if post is not None:
                if 'type' not in post:
                    log("ERROR: Invalid schema for " + name +
                        ": 'postprocess' object must have a 'type'")
                    return -1
                if 'file' not in post:
                    log("ERROR: Invalid schema for " + name +
                        ": 'postprocess' object must have a 'file'")
                    return -1
                post_type = post['type']
                post_file = post['file']

                if post_type == "patch":
                    applyPatchFile(post_file, name,
                                   post.get('pnum', DEFAULT_PNUM))
                elif post_type == "script":
                    runPythonScript(post_file)
                else:
                    log("ERROR: Unknown post-processing type '" + post_type +
                        "' for " + name)
                    return -1

            # add to cached state
            sdata.append(library)

            # write out cached state
            writeJSONData(sdata, state_filename)
        except:
            log("ERROR: Failure to bootstrap library " + name + " (reason: " +
                str(sys.exc_info()[0]) + ")")
            if break_on_first_error:
                exit(-1)
            traceback.print_exc()
            failed_libraries.append(name)

    if failed_libraries:
        log("***************************************")
        log("FAILURE to bootstrap the following libraries:")
        log(', '.join(failed_libraries))
        log("***************************************")
        return -1

    log("Finished")

    return 0
Beispiel #14
0
    def get_epg_from_receiver(self, provider, url):
        # reduce the pids to the ones containing SDT (0x11) and EIT (0x12)
        url_st = urlparse(url)
        queries = url_st.query
        new_queries = ""
        if queries:
            for eq in queries.split("&"):
                key = eq.split("=")[0]
                value = eq.split("=")[1]
                if key == 'pids':
                    value = "0,17,18"
                new_queries += key + "=" + value + "&"
        new_queries = new_queries.strip("&")
        url_epd_pids_only = urlunparse((
            url_st.scheme,
            url_st.netloc,
            url_st.path,
            url_st.params,
            new_queries,
            url_st.fragment,
        ))

        attr = [
            os.path.join(self.origin_dir, 'epg_grap.sh'), url_epd_pids_only,
            provider,
            str(self.config.read('epgloops')),
            str(self.config.read('epgtimeout'))
        ]  # process arguments
        self.logger.info("epg_grap started {0} {1} {2}".format(
            provider, url_epd_pids_only, repr(attr)))
        try:
            self.process = subprocess.Popen(attr,
                                            stdout=subprocess.PIPE,
                                            stderr=subprocess.PIPE)
            cleaner = Timer(
                600, self.cleanProcess
            )  # if epg_grap won't exit, try to terminate its process in 30 seconds
            cleaner.start()
            epg_out, err = self.process.communicate()
            #self.process.wait() # oops... not needed? harmless!
            cleaner.cancel()
            if err:
                self.logger.warning("epg_grap ended with an error:\n%s" %
                                    (err))
            else:
                self.logger.debug("epg_grap' ended")
                epg_json_string = epg_out.decode()
                epg_json = json.loads(epg_json_string)
                result = {}
                count = 0
                for json_movie in epg_json['details'].values():
                    start = json_movie['unixTimeBegin']
                    stop = json_movie['unixTimeEnd']
                    if json_movie['title']:
                        title = self.split_text_by_capital_chars(
                            json_movie['title'])[0]
                    else:
                        title = json_movie['name']
                    desc = '\n'.join(
                        self.split_text_by_capital_chars(
                            json_movie['description']))
                    category = json_movie['name']
                    count += 1

                    # we'll use the name of the stream source plugin instead the name of the EPG plugin itself
                    # plugin_name = self.plugin_names[0]
                    plugin_name = self.stream_source
                    self.providers.add(provider)
                    # EPG has its own special hardwired categories
                    #self.categories.add(category)
                    new_movie = MovieInfo(
                        url=url,
                        mime='video/MP2T',
                        title=title,
                        category=category,
                        source=plugin_name,
                        source_type=defaults.MOVIE_TYPE_STREAM,
                        provider=provider,
                        timestamp=int(start),
                        duration=stop - start,
                        description=desc)

                    if not plugin_name in self.movies:
                        self.movies[plugin_name] = {}
                    self.movies[plugin_name][new_movie['uri']] = new_movie
                    result[start] = new_movie
                for json_provider in epg_json['providers']:
                    self.logger.debug(
                        "channel found in epg: {0}".format(json_provider))
                self.logger.info("{0} epg loaded, {1} entries".format(
                    provider, count))
                return result
        except Exception as ex:
            self.logger.warning("epg_grap could not be started. Error: %s" %
                                (ex))
        return
    def http_error_302(self, req, fp, code, msg, headers):
        # Some servers (incorrectly) return multiple Location headers
        # (so probably same goes for URI).  Use first header.
        if "location" in headers:
            newurl = headers["location"]
        elif "uri" in headers:
            newurl = headers["uri"]
        else:
            return

        if newurl.startswith('training.phtml'):
            newurl = '/island/' + newurl
        elif not newurl.startswith('/'):
            newurl = '/' + newurl
            
        # fix relative URL
        newurl = 'http://www.neopets.com' + newurl

        # fix a possible malformed URL
        urlparts = urlparse(newurl)

        # For security reasons we don't allow redirection to anything other
        # than http, https or ftp.

        if not urlparts.scheme in ('http', 'https', 'ftp'):
            raise HTTPError(newurl, code,
                            msg +
                            " - Redirection to url '%s' is not allowed" %
                            newurl,
                            headers, fp)

        if not urlparts.path:
            urlparts = list(urlparts)
            urlparts[2] = "/"
        newurl = urlunparse(urlparts)

        newurl = urljoin(req.full_url, newurl)

        # XXX Probably want to forget about the state of the current
        # request, although that might interact poorly with other
        # handlers that also use handler-specific request attributes
        new = self.redirect_request(req, fp, code, msg, headers, newurl)
        if new is None:
            return

        # loop detection
        # .redirect_dict has a key url if url was previously visited.
        if hasattr(req, 'redirect_dict'):
            visited = new.redirect_dict = req.redirect_dict
            if (visited.get(newurl, 0) >= self.max_repeats or
                len(visited) >= self.max_redirections):
                raise HTTPError(req.full_url, code,
                                self.inf_msg + msg, headers, fp)
        else:
            visited = new.redirect_dict = req.redirect_dict = {}
        visited[newurl] = visited.get(newurl, 0) + 1

        # Don't close the fp until we are sure that we won't use it
        # with HTTPError.
        fp.read()
        fp.close()

        return self.parent.open(new, timeout=req.timeout)