Example #1
0
def login(opener):
    params = { "_flowId": "legacy-login-flow",
               "username": config["rhn-username"],
               "password": config["rhn-password"] }

    common.retrieve_m("https://www.redhat.com/wapps/sso/login.html",
                      urllib.urlencode(params), opener = opener)
Example #2
0
def login(opener):
    url = config["pt-root"] + "register/"
    html = common.retrieve_m(url, opener = opener, tries = 10).read()
    html = lxml.html.fromstring(html)

    params = {}
    for i in html.xpath("//form[@name = 'login']//input"):
        params[i.get("name")] = i.get("value")

    params["email"] = config["pt-username"]
    params["password"] = config["pt-password"]

    common.retrieve_m(url, urllib.urlencode(params), opener = opener, tries = 10)
Example #3
0
def getjnlpurl(url):
    f = common.retrieve_m(url)
    m = re.search("LaunchURL = \"(.*?)\"", f.read())
    f.close()

    t = urllib.splittype(url)
    return t[0] + "://" + urllib.splithost(t[1])[0] + m.group(1)
Example #4
0
def getjnlpurl(url):
    f = common.retrieve_m(url)
    m = re.search("LaunchURL = \"(.*?)\"", f.read().decode("utf-8"))
    f.close()

    t = urllib.parse.splittype(url)
    return t[0] + "://" + urllib.parse.splithost(t[1])[0] + m.group(1)
Example #5
0
def getjnlpurl(url):
    f = common.retrieve_m(url)
    m = re.search('LaunchURL = "(.*?)"', f.read().decode("utf-8"))
    f.close()

    t = urllib.parse.splittype(url)
    return t[0] + "://" + urllib.parse.splithost(t[1])[0] + m.group(1)
Example #6
0
def download_item_page(item, tries=1):
    try:
        html = common.retrieve_m(item.pageurl, tries=tries)
    except urllib.error.HTTPError as e:
        warn("can't load item page %s (#%u, %s, %s) (%s), continuing..." % \
                 (item.pageurl, item.number, item.title, item.type_, e))
        return

    xml = lxml.html.parse(html)

    try:
        if item.type_ == "Videos":
            item.set_dlurl(xml.xpath("//a[text()='OGG']/@href")[0])
            extension = ".ogg"
        else:
            item.set_dlurl(xml.xpath("//a[@class='ResourceFile']/@href")[0])
            extension = ".pdf"
    except IndexError:
        if item.type_ != "Other Resources":
            # "Other Resources" often contain a text article with no link
            warn(
                "can't find item download link at %s (#%u, %s, %s), continuing..."
                % (item.pageurl, item.number, item.title, item.type_))
        return

    download_item(item, extension, tries=tries)
Example #7
0
def download(item, db, tries):
    if item["href"] in db:
        path = db.get(item["href"])

    else:
        f = common.retrieve_m(config["clearspace-root"] + item["href"], tries=tries)
        doc = WikiDoc(f.read())
        f.close()

        path = doc.path + "/" + doc.filename

        if want(path):
            skip = False
            if os.path.exists(path):
                st = os.stat(path)
                if st.st_mtime == doc.mtime:
                    skip = True

            if not skip:
                common.mkdirs(doc.path)
                common.retrieve(config["clearspace-root"] + doc.filehref, path, force=True, tries=tries)
                common.mkro(path)
                os.utime(path, (doc.mtime, doc.mtime))

    updatedbs(db, keep, item["href"], path)
def download_item_page(item, tries = 1):
    try:
        html = common.retrieve_m(item.pageurl, tries = tries)
    except urllib2.HTTPError, e:
        warn("can't load item page %s (#%u, %s, %s) (%s), continuing..." % \
                 (item.pageurl, item.number, item.title, item.type_, e))
        return
Example #9
0
def read_project_list(opener):
    url = config["pt-root"] + "projects-emea/project-list"
    html = common.retrieve_m(url, opener = opener, tries = 10).read()
    html = lxml.html.fromstring(html)

    for _url in html.xpath("//a[text()='events']/@href"):
        _url = urlparse.urljoin(url, _url)
        q.put((read_events, opener, _url))
Example #10
0
def read_events(opener, url):
    try:
        html = common.retrieve_m(url, opener = opener, tries = 10).read()
    except urllib2.HTTPError, e:
        if e.code == 403:
            print >>sys.stderr, "WARNING: %s, continuing..." % e
            return
        else:
            raise
Example #11
0
def sync(query, keep):
    xml = common.retrieve_m(config["gsa-url"] + "?client=internal&output=xml&num=1000&filter=0&q=" + query, tries = 10)
    xml = lxml.etree.parse(xml)

    if int(xml.xpath("//M/text()")[0]) == 1000:
        raise Exception("search returned too many results")

    for result in xml.xpath("//U/text()"):
        dest = result.split("//")[1]
        dest = dest.replace("~", "")
        common.mkdirs(os.path.split(dest)[0])
        common.retrieve(result, dest, tries = 10)
        common.mkro(dest)
        keep.add(dest)
Example #12
0
def sync(query, keep):
    xml = common.retrieve_m(
        config["gsa-url"] +
        "?client=internal&output=xml&num=1000&filter=0&q=" + query,
        tries=10)
    xml = lxml.etree.parse(xml)

    if int(xml.xpath("//M/text()")[0]) == 1000:
        raise Exception("search returned too many results")

    for result in xml.xpath("//U/text()"):
        dest = result.split("//")[1]
        dest = dest.replace("~", "")
        common.mkdirs(os.path.split(dest)[0])
        common.retrieve(result, dest, tries=10)
        common.mkro(dest)
        keep.add(dest)
Example #13
0
def urls():
    cj = cookielib.CookieJar()
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))

    login(opener)

    html = common.retrieve_m("https://rhn.redhat.com/rhn/software/channel/downloads/Download.do?cid=15989", opener = opener).read()
    html = lxml.html.fromstring(html)

    urls = html.xpath("//div[@class='bases']/div//a/@href")
    urls = [(url.split("?")[0].split("/")[-1], url) for url in urls]
    for url in sorted(urls, key = lambda url: url[0]):
        yield url

    urls = html.xpath("//div[@class='incrementals']/div//a/@href")
    urls = [(url.split("?")[0].split("/")[-1], url) for url in urls]
    for url in sorted(urls, key = lambda url: url[0]):
        yield url
def download_item_page(item, tries = 1):
    try:
        html = common.retrieve_m(item.pageurl, tries = tries)
    except urllib.error.HTTPError as e:
        warn("can't load item page %s (#%u, %s, %s) (%s), continuing..." % \
                 (item.pageurl, item.number, item.title, item.type_, e))
        return

    xml = lxml.html.parse(html)

    try:
        if item.type_ == "Videos":
            item.set_dlurl(xml.xpath("//a[text()='OGG']/@href")[0])
            extension = ".ogg"
        else:
            item.set_dlurl(xml.xpath("//a[@class='ResourceFile']/@href")[0])
            extension = ".pdf"
    except IndexError:
        if item.type_ != "Other Resources":
            # "Other Resources" often contain a text article with no link
            warn("can't find item download link at %s (#%u, %s, %s), continuing..." % (item.pageurl, item.number, item.title, item.type_))
        return

    download_item(item, extension, tries = tries)
    db = common.DB(".sync-db")

    now = time.gmtime()

    for line in config["lists-sync"]:
        line = line.split(" ")
        
        url = line[0].rstrip("/")
        _list = url.split("/")[-1]

        credentials = None
        if len(line) == 3:
            credentials = urllib.urlencode(dict(zip(("username", "password"),
                                                    line[1:3])))

        index = common.retrieve_m(url, credentials)
        index_xml = lxml.html.parse(index).getroot()
        index.close()

        for href in index_xml.xpath("//a[substring-after(@href, '.') = 'txt.gz']/@href"):
            tm = time.strptime(href, "%Y-%B.txt.gz")
            path = "%s/%04u/%02u" % (_list, tm.tm_year, tm.tm_mon)

            if tm.tm_year < int(config["lists-start-year"]):
                break

            if not path in db or not os.path.isfile(path):
                common.mkdirs(os.path.split(path)[0])
                try:
                    f = common.retrieve_tmpfile(url + "/" + href, credentials)
                except urllib2.HTTPError, e:
    opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
    urllib.request.install_opener(opener)

    # Permit write of UTF-8 characters to stderr (required when piping output)
    if sys.stderr.encoding == None:
        sys.stderr = codecs.getwriter("UTF-8")(sys.stderr)

    threads = int(config["resourcelibrary-threads"])
    # When multi-threaded, silence progress meter
    if threads > 1:
        common.progress = lambda x, y: None
        common.progress_finish = lambda: None

    tries = 10
    # Initialise cookie store
    common.retrieve_m("http://www.redhat.com/resourcelibrary/results",
                      tries = tries)

    i = 0
    indexurl = "http://www.redhat.com/resourcelibrary/results?portal:componentId=bf73926d-2aa3-4b8b-bf8d-a1f6f56b8469&portal:type=action&actionType=orderBy&orderBy=Date-Desc&resultPerPage=100"
    while True:
        html = common.retrieve_m(indexurl, tries = tries)
        xml = lxml.html.parse(html)

        for indexitem in xml.xpath("//div[@id='sidebar-left-events']"):
            indexitem = copy.deepcopy(indexitem)

            i += 1
            item = Item()

            item.number = i
            item.type_ = indexitem.xpath("//b[text()='Type:']")[0].tail.strip()
Example #17
0
        urllib.request.HTTPCookieProcessor(cj))
    urllib.request.install_opener(opener)

    # Permit write of UTF-8 characters to stderr (required when piping output)
    if sys.stderr.encoding == None:
        sys.stderr = codecs.getwriter("UTF-8")(sys.stderr)

    threads = int(config["resourcelibrary-threads"])
    # When multi-threaded, silence progress meter
    if threads > 1:
        common.progress = lambda x, y: None
        common.progress_finish = lambda: None

    tries = 10
    # Initialise cookie store
    common.retrieve_m("http://www.redhat.com/resourcelibrary/results",
                      tries=tries)

    i = 0
    indexurl = "http://www.redhat.com/resourcelibrary/results?portal:componentId=bf73926d-2aa3-4b8b-bf8d-a1f6f56b8469&portal:type=action&actionType=orderBy&orderBy=Date-Desc&resultPerPage=100"
    while True:
        html = common.retrieve_m(indexurl, tries=tries)
        xml = lxml.html.parse(html)

        for indexitem in xml.xpath("//div[@id='sidebar-left-events']"):
            indexitem = copy.deepcopy(indexitem)

            i += 1
            item = Item()

            item.number = i
            item.type_ = indexitem.xpath("//b[text()='Type:']")[0].tail.strip()
Example #18
0
    for line in config["lists-sync"]:
        line = line.split(" ")

        url = line[0].rstrip("/")
        _list = url.split("/")[-1]

        if args["list"] and _list != args["list"]:
            continue

        credentials = None
        if len(line) == 3:
            credentials = urllib.parse.urlencode(
                dict(zip(("username", "password"), line[1:3]))).encode("utf-8")

        index = common.retrieve_m(url, credentials)
        index_xml = lxml.html.parse(index).getroot()
        index.close()

        for href in index_xml.xpath(
                "//a[substring-after(@href, '.') = 'txt.gz']/@href"):
            tm = time.strptime(href, "%Y-%B.txt.gz")
            path = "%s/%04u/%02u" % (_list, tm.tm_year, tm.tm_mon)

            if tm.tm_year < int(config["lists-start-year"]):
                break

            if not path in db or not os.path.isfile(path):
                common.mkdirs(os.path.split(path)[0])
                req = urllib.request.Request(url + "/" + href, credentials,
                                             {"Accept-Encoding": "gzip"})
Example #19
0
    threads = int(config["clearspace-threads"])
    if threads > 1:
        common.progress = lambda x, y: None
        common.progress_finish = lambda: None

    for i in range(threads):
        t = threading.Thread(target=worker, name=i)
        t.daemon = True
        t.start()

    keep = set()
    step = 50
    tries = 10
    for i in itertools.count(step=step):
        f = common.retrieve_m(
            config["clearspace-root"] + "/view-documents.jspa?start=%u&numResults=%u&filter=presentations" % (i, step),
            tries=tries,
        )
        index = DocIndex(f.read())
        f.close()

        for item in index.items:
            q.put((download, item, db, tries))

        if len(index.items) != step:
            break

    q.join()

    for dirpath, dirnames, filenames in os.walk(".", topdown=False):
        # remove local files which are no longer found in clearspace
        for f in filenames: