Python urlparse Exemples, s3compat.urlparse.urlparse Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : ebola.cases.update.py Projet : nursix/eden-core

 def write_details_to_csv(csv_file, statistic, data):
     cnt = 0
     writerow = csv_file.writerow
     for (location, details) in data.items():
         loc = location_list[location]
         for key in sorted(details.keys()):
             row = details[key]
             source = row[2]
             source_url = ""
             url = urlparse.urlparse(source)
             if url[0] != "":
                 source_url = source
                 (head, tail) = split(url[2])
                 source = tail.replace("%20", " ")
             cnt += 1
             writerow([
                 statistic, row[0], loc[0], loc[1], loc[2], loc[3], key,
                 s3_str(source), row[1], source_url
             ])

Exemple #2

0

Afficher le fichier

Fichier : parser.py Projet : SDLLABS/eden

    def parse_rss_2_cap(message):
        """
            Parse RSS Feeds into the CAP Module
        """

        db = current.db
        s3db = current.s3db
        table = s3db.msg_rss
        message_id = message.message_id
        record = db(table.message_id == message_id).select(
            table.id,
            table.channel_id,
            table.title,
            table.from_address,
            table.body,
            table.date,
            table.location_id,
            table.author,
            limitby=(0, 1)).first()
        if not record:
            return

        pstable = s3db.msg_parsing_status
        # not adding (pstable.channel_id == record.channel_id) to query
        # because two channels (http://host.domain/eden/cap/public.rss and
        # (http://host.domain/eden/cap/alert.rss) may contain common url
        # eg. http://host.domain/eden/cap/public/xx.cap
        pquery = (pstable.message_id == message_id)
        prows = db(pquery).select(pstable.id, pstable.is_parsed)
        for prow in prows:
            if prow.is_parsed:
                return

        alert_table = s3db.cap_alert
        info_table = s3db.cap_info

        # Is this an Update or a Create?
        # @ToDo: Use guid?
        # Use Body
        body = record.body or record.title
        query = (info_table.description == body)
        exists = db(query).select(info_table.id, limitby=(0, 1)).first()

        author = record.author
        if author:
            ptable = s3db.pr_person
            # https://code.google.com/p/python-nameparser/
            from nameparser import HumanName
            name = HumanName(author)
            first_name = name.first
            middle_name = name.middle
            last_name = name.last
            query = (ptable.first_name == first_name) & \
                    (ptable.middle_name == middle_name) & \
                    (ptable.last_name == last_name)
            pexists = db(query).select(ptable.id, limitby=(0, 1)).first()
            if pexists:
                person_id = pexists.id
            else:
                person_id = ptable.insert(first_name=first_name,
                                          middle_name=middle_name,
                                          last_name=last_name)
                s3db.update_super(ptable, {"id": person_id})
        else:
            person_id = None

        if exists:
            # @ToDo: Use XSLT
            info_id = exists.id
            db(info_table.id == info_id).update(
                headline=record.title,
                description=body,
                created_on=record.date,
                #location_id = record.location_id,
                #person_id = person_id,
            )

        else:
            # Embedded link
            url = record.from_address
            import_xml = s3db.resource("cap_alert").import_xml
            stylesheet = os.path.join(current.request.folder, "static",
                                      "formats", "cap", "import.xsl")
            try:
                file = fetch(url)
            except HTTPError as e:
                import base64
                rss_table = s3db.msg_rss_channel
                query = (rss_table.channel_id == record.channel_id)
                channel = db(query).select(rss_table.date,
                                           rss_table.etag,
                                           rss_table.url,
                                           rss_table.username,
                                           rss_table.password,
                                           limitby=(0, 1)).first()
                username = channel.username
                password = channel.password
                if e.code == 401 and username and password:
                    request = urllib2.Request(url)
                    base64string = base64.encodestring("%s:%s" %
                                                       (username, password))
                    request.add_header("Authorization",
                                       "Basic %s" % base64string)
                else:
                    request = None

                try:
                    file = urlopen(request).read() if request else fetch(url)
                except HTTPError as e:
                    # Check if there are links to look into
                    ltable = s3db.msg_rss_link
                    query_ = (ltable.rss_id
                              == record.id) & (ltable.deleted != True)
                    rows_ = db(query_).select(ltable.type, ltable.url)
                    url_format = "{uri.scheme}://{uri.netloc}/".format
                    url_domain = url_format(uri=urlparse.urlparse(url))
                    for row_ in rows_:
                        url = row_.url
                        if url and row_.type == "application/cap+xml" and \
                           url_domain == url_format(uri=urlparse.urlparse(url)):
                            # Same domain, so okey to use same username/pwd combination
                            if e.code == 401 and username and password:
                                request = urllib2.Request(url)
                                request.add_header("Authorization",
                                                   "Basic %s" % base64string)
                            else:
                                request = None
                            try:
                                file = urlopen(
                                    request).read() if request else fetch(url)
                            except HTTPError as e:
                                current.log.error(
                                    "Getting content from link failed: %s" % e)
                            else:
                                # Import via XSLT
                                import_xml(StringIO(file),
                                           stylesheet=stylesheet,
                                           ignore_errors=True)
                else:
                    # Import via XSLT
                    import_xml(StringIO(file),
                               stylesheet=stylesheet,
                               ignore_errors=True)
            else:
                # Public Alerts
                # eg. http://host.domain/eden/cap/public/xx.cap
                # Import via XSLT
                import_xml(StringIO(file),
                           stylesheet=stylesheet,
                           ignore_errors=True)

        # No Reply
        return

Exemple #3

0

Afficher le fichier

Fichier : parser.py Projet : sahana/eden-stable

    def fetch_cap(cls, entry):
        """
            Fetch and parse the CAP-XML source for an RSS/Atom feed entry

            @param entry: the RSS/Atom feed entry (msg_rss Row), containing:
                          - id
                          - channel_id
                          - from_address

            @returns: tuple (url, tree, version, error)
                      - url     = the URL of the CAP-XML source used
                      - tree    = ElementTree of the CAP source
                      - version = the detected CAP version
                      - error   = error message if unsuccessful, else None
        """

        db = current.db
        s3db = current.s3db

        AlertImporter = s3db.cap_ImportAlert

        # Get the URLs for all <link>s in this entry which are marked as cap+xml
        ltable = s3db.msg_rss_link
        query = (ltable.rss_id == entry.id) & \
                (ltable.type == "application/cap+xml") & \
                (ltable.deleted == False)
        links = db(query).select(ltable.url)
        urls = [link.url for link in links if link.url]

        # Add the main <link> of the entry (=from_address) as fallback
        if entry.from_address:
            urls.append(entry.from_address)

        # Simple domain formatter for URLs
        url_format = "{uri.scheme}://{uri.netloc}/".format

        # Get domain/username/password for the channel
        ctable = s3db.msg_rss_channel
        query = (ctable.channel_id == entry.channel_id) & \
                (ctable.deleted == False)
        channel = db(query).select(
            ctable.url,
            ctable.username,
            ctable.password,
            limitby=(0, 1),
        ).first()
        if channel:
            channel_domain = url_format(uri=urlparse.urlparse(channel.url))
            username = channel.username
            password = channel.password
        else:
            channel_domain = None
            username = password = None

        # Iterate over <link> URLs to find the CAP source
        errors = []
        cap_url = version = tree = None
        for url in urls:

            error = None
            current.log.debug("Fetching CAP-XML from %s" % url)

            # If same domain as channel, use channel credentials for auth
            if channel_domain and url_format(
                    uri=urlparse.urlparse(url)) == channel_domain:
                opener = AlertImporter.opener(url,
                                              username=username,
                                              password=password)
            else:
                opener = AlertImporter.opener(url)

            # Fetch the link content
            try:
                content = opener.open(url)
            except HTTPError as e:
                # HTTP status
                error = "HTTP %s: %s" % (e.code, e.read())
            except URLError as e:
                # URL Error (network error)
                error = "CAP source unavailable (%s)" % e.reason
            except Exception:
                # Other error (local error)
                import sys
                error = sys.exc_info()[1]
            else:
                # Try parse
                tree, version, error = AlertImporter.parse_cap(content)

            if tree:
                # XML source found => proceed to import
                cap_url = url
                break
            elif error:
                errors.append(error)
            else:
                errors.append("Not a valid CAP source: %s" % url)

        if errors:
            error = "\n".join(errors)
        else:
            error = None

        return cap_url, tree, version, error

Exemple #4

0

Afficher le fichier

    def notify(cls, resource_id):
        """
            Asynchronous task to notify a subscriber about updates,
            runs a POST?format=msg request against the subscribed
            controller which extracts the data and renders and sends
            the notification message (see send()).

            @param resource_id: the pr_subscription_resource record ID
        """

        _debug = current.log.debug
        _debug("S3Notifications.notify(resource_id=%s)" % resource_id)

        db = current.db
        s3db = current.s3db

        stable = s3db.pr_subscription
        rtable = db.pr_subscription_resource
        ftable = s3db.pr_filter

        # Extract the subscription data
        join = stable.on(rtable.subscription_id == stable.id)
        left = ftable.on(ftable.id == stable.filter_id)

        # @todo: should not need rtable.resource here
        row = db(rtable.id == resource_id).select(stable.id,
                                                  stable.pe_id,
                                                  stable.frequency,
                                                  stable.notify_on,
                                                  stable.method,
                                                  stable.email_format,
                                                  stable.attachment,
                                                  rtable.id,
                                                  rtable.resource,
                                                  rtable.url,
                                                  rtable.last_check_time,
                                                  ftable.query,
                                                  join=join,
                                                  left=left).first()
        if not row:
            return True

        s = getattr(row, "pr_subscription")
        r = getattr(row, "pr_subscription_resource")
        f = getattr(row, "pr_filter")

        # Create a temporary token to authorize the lookup request
        auth_token = str(uuid4())

        # Store the auth_token in the subscription record
        r.update_record(auth_token=auth_token)
        db.commit()

        # Construct the send-URL
        public_url = current.deployment_settings.get_base_public_url()
        lookup_url = "%s/%s/%s" % (public_url,
                                   current.request.application,
                                   r.url.lstrip("/"))

        # Break up the URL into its components
        purl = list(urlparse.urlparse(lookup_url))

        # Subscription parameters
        # Date (must ensure we pass to REST as tz-aware)
        last_check_time = s3_encode_iso_datetime(r.last_check_time)
        query = {"subscription": auth_token, "format": "msg"}
        if "upd" in s.notify_on:
            query["~.modified_on__ge"] = "%sZ" % last_check_time
        else:
            query["~.created_on__ge"] = "%sZ" % last_check_time

        # Filters
        if f.query:
            from .s3filter import S3FilterString
            resource = s3db.resource(r.resource)
            fstring = S3FilterString(resource, f.query)
            for k, v in fstring.get_vars.items():
                if v is not None:
                    if k in query:
                        value = query[k]
                        if type(value) is list:
                            value.append(v)
                        else:
                            query[k] = [value, v]
                    else:
                        query[k] = v
            query_nice = s3_unicode(fstring.represent())
        else:
            query_nice = None

        # Add subscription parameters and filters to the URL query, and
        # put the URL back together
        query = urlencode(query)
        if purl[4]:
            query = "&".join((purl[4], query))
        page_url = urlparse.urlunparse([purl[0], # scheme
                                        purl[1], # netloc
                                        purl[2], # path
                                        purl[3], # params
                                        query,   # query
                                        purl[5], # fragment
                                        ])

        # Serialize data for send (avoid second lookup in send)
        data = json.dumps({"pe_id": s.pe_id,
                           "notify_on": s.notify_on,
                           "method": s.method,
                           "email_format": s.email_format,
                           "attachment": s.attachment,
                           "resource": r.resource,
                           "last_check_time": last_check_time,
                           "filter_query": query_nice,
                           "page_url": lookup_url,
                           "item_url": None,
                           })

        # Send the request
        _debug("Requesting %s" % page_url)
        req = urllib2.Request(page_url, data=data.encode("utf-8"))
        req.add_header("Content-Type", "application/json")
        success = False
        try:
            response = json.loads(urlopen(req).read())
            message = response["message"]
            if response["status"] == "success":
                success = True
        except HTTPError as e:
            message = ("HTTP %s: %s" % (e.code, e.read()))
        except:
            exc_info = sys.exc_info()[:2]
            message = ("%s: %s" % (exc_info[0].__name__, exc_info[1]))
        _debug(message)

        # Update time stamps and unlock, invalidate auth token
        intervals = s3db.pr_subscription_check_intervals
        interval = datetime.timedelta(minutes=intervals.get(s.frequency, 0))
        if success:
            last_check_time = datetime.datetime.utcnow()
            next_check_time = last_check_time + interval
            r.update_record(auth_token=None,
                            locked=False,
                            last_check_time=last_check_time,
                            next_check_time=next_check_time)
        else:
            r.update_record(auth_token=None,
                            locked=False)
        db.commit()

        # Done
        return message