def extract_hit_url(row):
    """
    Extract the external question URL from XML encoded Question.

    If not an ExternalQuestion, fail and return original data
    """
    try:
        row_dict = parse_xml(row)
        if 'ExternalQuestion' in row_dict.keys():
            return row_dict['ExternalQuestion']['ExternalURL']
        elif 'HTMLQuestion' in row_dict.keys():
            return row_dict['HTMLQuestion']['HTMLContent']
        else:
            return row
    except KeyError:
        return row
Esempio n. 2
0
        def dump_external_resources_from_xml(path):
            def find_external(tree, key=None):
                if isinstance(tree, dict):
                    for key, value in tree.iteritems():
                        for found in find_external(value, key=key):
                            yield found
                elif isinstance(tree, list):
                    for item in tree:
                        for found in find_external(item, key=key):
                            yield found
                elif isinstance(tree, unicode):
                    text = tree.encode('utf-8')
                    for find in (match_fileurl, match_filename):
                        found = find(key, text)
                        if found:
                            yield found

            with open(path) as fd:
                content = fd.read()
                tree = parse_xml(content)
                for found in find_external(tree):
                    if found.find(config.gs_data_dir) != 0:
                        copy_external_resource(found)
Esempio n. 3
0
        def dump_external_resources_from_xml(path):

            def find_external(tree, key=None):
                if isinstance(tree, dict):
                    for key, value in tree.iteritems():
                        for found in find_external(value, key=key):
                            yield found
                elif isinstance(tree, list):
                    for item in tree:
                        for found in find_external(item, key=key):
                            yield found
                elif isinstance(tree, unicode):
                    text = tree.encode('utf-8')
                    for find in (match_fileurl, match_filename):
                        found = find(key, text)
                        if found:
                            yield found

            with open(path) as fd:
                content = fd.read()
                tree = parse_xml(content)
                for found in find_external(tree):
                    if found.find(config.gs_data_dir) != 0:
                        copy_external_resource(found)
Esempio n. 4
0
 def __call__(self, *credential, **payload):
     """Perform the RPC call while following CCP's caching guidelines."""
     
     if len(credential) > 1:
         raise Exception("The only positional parameter allowed is the credentials object.")
     
     now = datetime.utcnow()
     uri = self.uri(self.name)
     
     # Define the keyID/vCode API key arguments, if we have credentials.
     if credential:
         payload['keyID'] = credential[0].key
         payload['vCode'] = credential[0].code
     
     # Hash the arguments in a reliable way by converting to text in a way which sorts the keys.
     payload_hash = sha256(EnhancedBencode().encode(payload)).hexdigest()
     
     # Examine the cache.
     cache = CachedAPIValue.objects(
             key = payload.get('keyID', None),
             name = self.name,
             arguments = payload_hash,
             expires__gte = now
         ).first()
     
     if cache:
         return XMLBunch(cache.result)
     
     # Actually perform the query if a cached version could not be found.
     response = requests.post(uri, data=payload or None)
     response.raise_for_status()
     
     # We don't want the initial XML prefix.  We should still check it, though.
     prefix, _, data = response.text.partition('\n')
     
     if prefix.strip() != "<?xml version='1.0' encoding='UTF-8'?>":
         raise Exception("Data returned doesn't seem to be XML!")
     
     data = XMLBunch(parse_xml(data.strip())).eveapi
     result = data.result
     
     if 'rowset' in result:
         restruct = XMLBunch()
         
         for rowset in (XMLBunch(i) for i in (result.rowset if isinstance(result.rowset, list) else [result.rowset])):
             restruct[rowset['@name']] = []
             for row in (XMLBunch(i) for i in (rowset.row if isinstance(rowset.row, list) else [rowset.row])):
                 restruct[rowset['@name']].append(row)
         
         result.rowset = restruct
     
     # Upsert (update if exists, create if it doesn't) the cache value.
     CachedAPIValue.objects(
             key = payload.get('keyID', None),
             name = self.name,
             arguments = payload_hash
         ).update_one(
             upsert = True,
             set__expires = datetime.strptime(data.cachedUntil, "%Y-%m-%d %H:%M:%S"),
             set__result = result
         )
     
     return result
Esempio n. 5
0
    def __call__(self, *credential, **payload):
        """Perform the RPC call while following CCP's caching guidelines."""

        if len(credential) > 1:
            raise Exception(
                "The only positional parameter allowed is the credentials object."
            )

        now = datetime.utcnow()
        uri = self.uri(self.name)

        # Define the keyID/vCode API key arguments, if we have credentials.
        if credential:
            payload['keyID'] = credential[0].key
            payload['vCode'] = credential[0].code

        # Hash the arguments in a reliable way by converting to text in a way which sorts the keys.
        payload_hash = sha256(EnhancedBencode().encode(payload)).hexdigest()

        # Examine the cache.
        cache = CachedAPIValue.objects(key=payload.get('keyID', None),
                                       name=self.name,
                                       arguments=payload_hash,
                                       expires__gte=now).first()

        if cache:
            return XMLBunch(cache.result)

        # Actually perform the query if a cached version could not be found.
        response = requests.post(uri, data=payload or None)
        response.raise_for_status()

        # We don't want the initial XML prefix.  We should still check it, though.
        prefix, _, data = response.text.partition('\n')

        if prefix.strip() != "<?xml version='1.0' encoding='UTF-8'?>":
            raise Exception("Data returned doesn't seem to be XML!")

        data = XMLBunch(parse_xml(data.strip())).eveapi
        result = data.result

        if 'rowset' in result and 'row' in result.rowset:
            restruct = XMLBunch()

            for rowset in (XMLBunch(i) for i in (result.rowset if isinstance(
                    result.rowset, list) else [result.rowset])):
                restruct[rowset['@name']] = []
                for row in (XMLBunch(i) for i in (rowset.row if isinstance(
                        rowset.row, list) else [rowset.row])):
                    restruct[rowset['@name']].append(row)

            result.rowset = restruct

        # Upsert (update if exists, create if it doesn't) the cache value.
        CachedAPIValue.objects(key=payload.get('keyID', None),
                               name=self.name,
                               arguments=payload_hash).update_one(
                                   upsert=True,
                                   set__expires=datetime.strptime(
                                       data.cachedUntil, "%Y-%m-%d %H:%M:%S"),
                                   set__result=result)

        return result