Exemple #1
0
def fetch(packages=[], path=os.path.curdir):
    """
    Fetches the given packages from the repository without installing, just downloads the packages.
    @param packages: list of package names -> list_of_strings
    @param path: path to where the packages will be downloaded. If not given, packages will be downloaded
    to the current working directory.
    """
    packagedb = pisi.db.packagedb.PackageDB()
    repodb = pisi.db.repodb.RepoDB()
    for name in packages:
        package, repo = packagedb.get_package_repo(name)
        ctx.ui.info(
            _("%s package found in %s repository") % (package.name, repo))
        uri = pisi.uri.URI(package.packageURI)
        output = os.path.join(path, uri.path())
        if os.path.exists(
                output) and package.packageHash == pisi.util.sha1_file(output):
            ctx.ui.warning(_("%s package already fetched") % uri.path())
            continue
        if uri.is_absolute_path():
            url = str(pkg_uri)
        else:
            url = os.path.join(os.path.dirname(repodb.get_repo_url(repo)),
                               str(uri.path()))

        fetcher.fetch_url(url, path, ctx.ui.Progress)
Exemple #2
0
    def fetch_remote_file(self, url):
        dest = ctx.config.cached_packages_dir()
        self.filepath = os.path.join(dest, url.filename())

        if not os.path.exists(self.filepath):
            try:
                fetcher.fetch_url(url, dest, ctx.ui.Progress)
            except pisi.fetcher.FetchError:
                # Bug 3465
                if ctx.get_option('reinstall'):
                    raise Error(_("There was a problem while fetching '%s'.\nThe package "
                    "may have been upgraded. Please try to upgrade the package.") % url);
                raise
        else:
            ctx.ui.info(_('%s [cached]') % url.filename())
Exemple #3
0
    def read_uri(self, filename, repo = None):
        """Read PSPEC file"""

        self.filepath = filename
        url = URI(filename)
        if url.is_remote_file():
            from fetcher import fetch_url
            assert repo
            dest = os.path.join(ctx.config.index_dir(), repo)
            if not os.path.exists(dest):
                os.makedirs(dest)
            fetch_url(url, dest, ctx.ui.Progress)

            self.filepath = os.path.join(dest, url.filename())

        self.read(self.filepath)
Exemple #4
0
    def __init__(self, packagefn, mode='r'):
        self.filepath = packagefn
        url = URI(packagefn)

        if url.is_remote_file():
            from fetcher import fetch_url
            dest = ctx.config.packages_dir()
            self.filepath = join(dest, url.filename())
            
            # FIXME: exists is not enough, also sha1sum check needed \
            #        when implemented in pisi-index.xml
            if not exists(self.filepath):
                fetch_url(url, dest, ctx.ui.Progress)
            else:
                ctx.ui.info(_('%s [cached]') % url.filename())
                
        self.impl = archive.ArchiveZip(self.filepath, 'zip', mode)
Exemple #5
0
    def fetch_remote_file(self, url):
        dest = ctx.config.cached_packages_dir()
        self.filepath = os.path.join(dest, url.filename())

        if not os.path.exists(self.filepath):
            try:
                fetcher.fetch_url(url, dest, ctx.ui.Progress)
            except pisi.fetcher.FetchError:
                # Bug 3465
                if ctx.get_option('reinstall'):
                    raise Error(
                        _("There was a problem while fetching '%s'.\nThe package "
                          "may have been upgraded. Please try to upgrade the package."
                          ) % url)
                raise
        else:
            ctx.ui.info(_('%s [cached]') % url.filename())
Exemple #6
0
def fetch(packages=[], path=os.path.curdir):
    """
    Fetches the given packages from the repository without installing, just downloads the packages.
    @param packages: list of package names -> list_of_strings
    @param path: path to where the packages will be downloaded. If not given, packages will be downloaded
    to the current working directory.
    """
    packagedb = pisi.db.packagedb.PackageDB()
    repodb = pisi.db.repodb.RepoDB()
    for name in packages:
        package, repo = packagedb.get_package_repo(name)
        uri = pisi.uri.URI(package.packageURI)
        if uri.is_absolute_path():
            url = str(pkg_uri)
        else:
            url = os.path.join(os.path.dirname(repodb.get_repo_url(repo)), str(uri.path()))

        fetcher.fetch_url(url, path, ctx.ui.Progress)
Exemple #7
0
def fetch(packages=[], path=os.path.curdir):
    """
    Fetches the given packages from the repository without installing, just downloads the packages.
    @param packages: list of package names -> list_of_strings
    @param path: path to where the packages will be downloaded. If not given, packages will be downloaded
    to the current working directory.
    """
    packagedb = pisi.db.packagedb.PackageDB()
    repodb = pisi.db.repodb.RepoDB()
    for name in packages:
        package, repo = packagedb.get_package_repo(name)
        uri = pisi.uri.URI(package.packageURI)
        if uri.is_absolute_path():
            url = str(pkg_uri)
        else:
            url = os.path.join(os.path.dirname(repodb.get_repo_url(repo)),
                               str(uri.path()))

        fetcher.fetch_url(url, path, ctx.ui.Progress)
Exemple #8
0
    def fetch_remote_file(self, url):
        from fetcher import fetch_url
        dest = ctx.config.packages_dir()
        self.filepath = join(dest, url.filename())

        sha1sum = None
        if exists(self.filepath):
            sha1sum = util.sha1_file(self.filepath)

        name, version = util.parse_package_name(basename(self.filepath))
        if sha1sum != ctx.packagedb.get_package(name).packageHash:
            try:
                fetch_url(url, dest, ctx.ui.Progress)
            except pisi.fetcher.FetchError:
                # Bug 3465
                if ctx.get_option('reinstall'):
                    raise Error(_("There was a problem while fetching '%s'.\nThe package "
                    "may have been upgraded. Please try to upgrade the package.") % url);
                raise
        else:
            ctx.ui.info(_('%s [cached]') % url.filename())
Exemple #9
0
    def read(self, filename, repo = None):
        """Read PSPEC file"""

        self.filepath = filename
        url = URI(filename)
        if url.is_remote_file():
            from fetcher import fetch_url

            dest = os.path.join(ctx.config.index_dir(), repo)
            if not os.path.exists(dest):
                os.makedirs(dest)
            fetch_url(url, dest, ctx.ui.Progress)

            self.filepath = os.path.join(dest, url.filename())

        self.readxml(self.filepath)

        # find all binary packages
        packageElts = self.getAllNodes("Package")
        self.packages = [metadata.PackageInfo(p) for p in packageElts]
        
        self.unlink()
Exemple #10
0
def fetch(packages=[], path=os.path.curdir):
    """
    Fetches the given packages from the repository without installing, just downloads the packages.
    @param packages: list of package names -> list_of_strings
    @param path: path to where the packages will be downloaded. If not given, packages will be downloaded
    to the current working directory.
    """
    packagedb = pisi.db.packagedb.PackageDB()
    repodb = pisi.db.repodb.RepoDB()
    for name in packages:
        package, repo = packagedb.get_package_repo(name)
        ctx.ui.info(_("%s package found in %s repository") % (package.name, repo))
        uri = pisi.uri.URI(package.packageURI)
        output = os.path.join(path, uri.path())
        if os.path.exists(output) and package.packageHash == pisi.util.sha1_file(output):
            ctx.ui.warning(_("%s package already fetched") % uri.path())
            continue
        if uri.is_absolute_path():
            url = str(pkg_uri)
        else:
            url = os.path.join(os.path.dirname(repodb.get_repo_url(repo)), str(uri.path()))

        fetcher.fetch_url(url, path, ctx.ui.Progress)
Exemple #11
0
    def feed_add(self, request):
        form_message = ''
        groups = get_groups(self.user)

        # URL could be passed via a GET (bookmarklet) or POST
        self_link = request.params.get('self_link', '').strip()

        if request.method == 'GET':
            return self.respond_with_template('_feed_add_wizard_1.html', locals())

        if not is_valid_url(self_link):
            form_message = u'ERROR Error, specify a valid web address'
            return self.respond_with_template('_feed_add_wizard_1.html', locals())
        response = fetcher.fetch_url(self_link)
        if response:
            if response.status_code not in fetcher.POSITIVE_STATUS_CODES:
                form_message = u'ERROR Error, feed host returned: %s' % filters.status_title(
                    response.status_code)
                return self.respond_with_template('_feed_add_wizard_1.html', locals())
        else:
            form_message = u'ERROR Error, a network error occured'
            return self.respond_with_template('_feed_add_wizard_1.html', locals())

        group_id = int(request.POST.get('group', 0))
        if group_id:
            group = Group.get(Group.id == group_id)
        else:
            group = Group.get(Group.title == Group.DEFAULT_GROUP)

        fetcher.load_plugins()
        trigger_event('fetch_started')
        feed = Feed()
        feed.self_link = self_link
        feed = fetcher.add_feed(feed, fetch_icon=True, add_entries=True)
        trigger_event('fetch_done', [feed])
        subscription = fetcher.add_subscription(feed, self.user, group)
        if subscription:
            self.alert_message = u'SUCCESS Feed has been added to <i>%s</i> group' % group.title
        else:
            self.alert_message = u'INFO Feed is already in <i>%s</i> group' % group.title
        return self.respond_with_script('_modal_done.js', {'location': '%s/?feed=%d' % (request.application_url, feed.id)})
def extract_privacy_url(url):
    try:
        data = fetch_url(url)
        soup = BeautifulSoup(data)
    except Exception as e:
        return "%s"%(e)

    a_list = soup.findAll("a")
    lst = []
    for node in a_list:
        if node.get_text().lower().find("privacy") != -1:
            lst.append(node)
    if len(lst)==1:
        return lst[0]    
        
    for node in lst:
       if node.get_text().lower().find("policy")!=-1 or\
          node.get_text().lower().find("notice")!=-1 or\
          node.get_text().lower().find("promise")!=-1:
           return node
    return None
Exemple #13
0
    def process_feed(self, overflow, overflow_reason):
        # Sync pull down the latest feeds
        resp = yield fetch_url(self.feed_url, user_agent=self.user_agent)
        parsed_feed = json.loads(resp.content)

        posts = parsed_feed.get('data', [])
        new_entries = 0
        for post in posts:
            key = ndb.Key(Entry, post.get('id'), parent=self.key)
            entry = yield key.get_async()
            if not entry:
                standard_resolution = post.get('images', {}).get('standard_resolution')
                kwargs = {}
                kwargs['image_url'] = standard_resolution.get('url')
                kwargs['image_width'] = standard_resolution.get('width')
                kwargs['image_height'] = standard_resolution.get('height')
                low_resolution = post.get('images', {}).get('low_resolution')
                kwargs['thumbnail_image_url'] = low_resolution.get('url')
                kwargs['thumbnail_image_width'] = low_resolution.get('width')
                kwargs['thumbnail_image_height'] = low_resolution.get('height')
                caption = post.get('caption')
                if not caption:
                    kwargs['title']  = '.'
                else:
                    kwargs['title'] = caption.get('text', '')
                kwargs['link'] = post.get('link')
                kwargs['feed_item'] = post
                kwargs['creating'] = False
                if overflow:
                    kwargs['overflow'] = overflow
                    kwargs['overflow_reason'] = overflow_reason
                    kwargs['published'] = True

                entry = Entry(key=key, guid=post.get('id'), **kwargs)
                new_entries += 1
                yield entry.put_async()

        raise ndb.Return((self, new_entries))
def crawl_alexa_category(category):
    URLFORMAT="http://www.alexa.com/topsites/category%s/Top/%s"
    urls = []
    for i in xrange(10,20):
        if i==0:
            url = URLFORMAT%('', category)
        else:
            url = URLFORMAT%(";%d"%i, category)
        try:
            data = fetch_url(url)
            soup = BeautifulSoup(data)
        except:
            continue
        links = soup.findAll("a")
        for link in links:
            if link.has_attr("href"):
                url = link.attrs["href"]
                offset = url.find("/siteinfo/")
                if offset!=-1:
                    urls.append(url[len("/siteinfo/"):])
        
    urls = ["http://%s"%u for u in urls]
    return urls
Exemple #15
0
    def fetch(self, appendDest=""):
        from fetcher import fetch_url

        ctx.ui.info(_("Fetching %s") % self.url.uri)
        dest = join(self.dest, appendDest)
        fetch_url(self.url, dest)