def extract_hit_url(row): """ Extract the external question URL from XML encoded Question. If not an ExternalQuestion, fail and return original data """ try: row_dict = parse_xml(row) if 'ExternalQuestion' in row_dict.keys(): return row_dict['ExternalQuestion']['ExternalURL'] elif 'HTMLQuestion' in row_dict.keys(): return row_dict['HTMLQuestion']['HTMLContent'] else: return row except KeyError: return row
def dump_external_resources_from_xml(path): def find_external(tree, key=None): if isinstance(tree, dict): for key, value in tree.iteritems(): for found in find_external(value, key=key): yield found elif isinstance(tree, list): for item in tree: for found in find_external(item, key=key): yield found elif isinstance(tree, unicode): text = tree.encode('utf-8') for find in (match_fileurl, match_filename): found = find(key, text) if found: yield found with open(path) as fd: content = fd.read() tree = parse_xml(content) for found in find_external(tree): if found.find(config.gs_data_dir) != 0: copy_external_resource(found)
def dump_external_resources_from_xml(path): def find_external(tree, key=None): if isinstance(tree, dict): for key, value in tree.iteritems(): for found in find_external(value, key=key): yield found elif isinstance(tree, list): for item in tree: for found in find_external(item, key=key): yield found elif isinstance(tree, unicode): text = tree.encode('utf-8') for find in (match_fileurl, match_filename): found = find(key, text) if found: yield found with open(path) as fd: content = fd.read() tree = parse_xml(content) for found in find_external(tree): if found.find(config.gs_data_dir) != 0: copy_external_resource(found)
def __call__(self, *credential, **payload): """Perform the RPC call while following CCP's caching guidelines.""" if len(credential) > 1: raise Exception("The only positional parameter allowed is the credentials object.") now = datetime.utcnow() uri = self.uri(self.name) # Define the keyID/vCode API key arguments, if we have credentials. if credential: payload['keyID'] = credential[0].key payload['vCode'] = credential[0].code # Hash the arguments in a reliable way by converting to text in a way which sorts the keys. payload_hash = sha256(EnhancedBencode().encode(payload)).hexdigest() # Examine the cache. cache = CachedAPIValue.objects( key = payload.get('keyID', None), name = self.name, arguments = payload_hash, expires__gte = now ).first() if cache: return XMLBunch(cache.result) # Actually perform the query if a cached version could not be found. response = requests.post(uri, data=payload or None) response.raise_for_status() # We don't want the initial XML prefix. We should still check it, though. prefix, _, data = response.text.partition('\n') if prefix.strip() != "<?xml version='1.0' encoding='UTF-8'?>": raise Exception("Data returned doesn't seem to be XML!") data = XMLBunch(parse_xml(data.strip())).eveapi result = data.result if 'rowset' in result: restruct = XMLBunch() for rowset in (XMLBunch(i) for i in (result.rowset if isinstance(result.rowset, list) else [result.rowset])): restruct[rowset['@name']] = [] for row in (XMLBunch(i) for i in (rowset.row if isinstance(rowset.row, list) else [rowset.row])): restruct[rowset['@name']].append(row) result.rowset = restruct # Upsert (update if exists, create if it doesn't) the cache value. CachedAPIValue.objects( key = payload.get('keyID', None), name = self.name, arguments = payload_hash ).update_one( upsert = True, set__expires = datetime.strptime(data.cachedUntil, "%Y-%m-%d %H:%M:%S"), set__result = result ) return result
def __call__(self, *credential, **payload): """Perform the RPC call while following CCP's caching guidelines.""" if len(credential) > 1: raise Exception( "The only positional parameter allowed is the credentials object." ) now = datetime.utcnow() uri = self.uri(self.name) # Define the keyID/vCode API key arguments, if we have credentials. if credential: payload['keyID'] = credential[0].key payload['vCode'] = credential[0].code # Hash the arguments in a reliable way by converting to text in a way which sorts the keys. payload_hash = sha256(EnhancedBencode().encode(payload)).hexdigest() # Examine the cache. cache = CachedAPIValue.objects(key=payload.get('keyID', None), name=self.name, arguments=payload_hash, expires__gte=now).first() if cache: return XMLBunch(cache.result) # Actually perform the query if a cached version could not be found. response = requests.post(uri, data=payload or None) response.raise_for_status() # We don't want the initial XML prefix. We should still check it, though. prefix, _, data = response.text.partition('\n') if prefix.strip() != "<?xml version='1.0' encoding='UTF-8'?>": raise Exception("Data returned doesn't seem to be XML!") data = XMLBunch(parse_xml(data.strip())).eveapi result = data.result if 'rowset' in result and 'row' in result.rowset: restruct = XMLBunch() for rowset in (XMLBunch(i) for i in (result.rowset if isinstance( result.rowset, list) else [result.rowset])): restruct[rowset['@name']] = [] for row in (XMLBunch(i) for i in (rowset.row if isinstance( rowset.row, list) else [rowset.row])): restruct[rowset['@name']].append(row) result.rowset = restruct # Upsert (update if exists, create if it doesn't) the cache value. CachedAPIValue.objects(key=payload.get('keyID', None), name=self.name, arguments=payload_hash).update_one( upsert=True, set__expires=datetime.strptime( data.cachedUntil, "%Y-%m-%d %H:%M:%S"), set__result=result) return result