def extract_http(request_text, dict_filed): request_line, headers_alone = request_text.split('\r\n', 1) headers = Message(StringIO(headers_alone)) # print request_line request_line = request_line.split(' ', 2) # print request_line[0] if request_line[0] in {'GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'TRACE', 'CONNECT'}: dict_filed['tags']['action'] = request_line[0] dict_filed['tags']['Domain'] = headers['host'] dict_filed['tags']['URL'] = headers['host'] + request_line[1] # print request_line[1] else: dict_filed['tags']['result_code'] = request_line[1] if 'user_agent' in headers.keys(): dict_filed['tags']['user_agent'] = headers['user_agent'] if 'referer' in headers.keys(): dict_filed['tags']['referer'] = headers['referer'] if 'content-type' in headers.keys(): dict_filed['tags']['content-type'] = headers['content-type'] if 'accept' in headers.keys(): dict_filed['tags']['content-type'] = headers['accept'] # print headers.keys() # print dict_filed['URL'] """
def __call__(self, environ, start_response): #print "call! " + environ['REQUEST_URI'] #print environ hdrlist = [] env = dict(environ) if env['HTTP_HOST'].endswith('.'): env['HTTP_HOST'] = env['HTTP_HOST'][:-1] #FastPath for Push Garbage: if env['REQUEST_METHOD'] == "GET" and env['REQUEST_URI'].startswith('/__GARBAGE__'): amount_kb = int(env['REQUEST_URI'].split('__')[2]) N = amount_kb * 1024 response = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(N)) start_response('200 OK', [('Content-Type', 'text/plain')]) yield response else: cached_response = None is_push = False if env['REQUEST_METHOD'] == "GET": for u,host in enumerate(self.push_host): if host == env['HTTP_HOST']: for v,push_resource in enumerate(self.push_assets[u]): parsed = urlparse(push_resource) stripped = push_resource[len('https://'+parsed.netloc):] push_resource = stripped if push_resource == env['REQUEST_URI']: #print "pushing from cache..." is_push = True cached_response = self.push_cache[u][v] if cached_response is None: # print environ['REQUEST_URI'], "Not cached!" passed_env = dict() # remap for compat with replayserver passed_env['MAHIMAHI_CHDIR'] = env['WORKING_DIR'] passed_env['MAHIMAHI_RECORD_PATH'] = env['RECORDING_DIR'] passed_env['REQUEST_METHOD'] = env['REQUEST_METHOD'] passed_env['REQUEST_URI'] = env['REQUEST_URI'] # env['SERVER_PROTOCOL'], is currently a hack to find the corresponding # h1 traces passed_env['SERVER_PROTOCOL'] = "HTTP/1.1" passed_env['HTTP_HOST'] = env['HTTP_HOST'] #if 'HTTP_USER_AGENT' in env.keys(): # passed_env['HTTP_USER_AGENT'] = env['HTTP_USER_AGENT'] if env['wsgi.url_scheme'] == 'https': passed_env['HTTPS'] = "1" # shell=True, p = subprocess.Popen( [env['REPLAYSERVER_FN']], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=passed_env) (cached_response, replay_stderr) = p.communicate() response_header, response_body = cached_response.split('\r\n\r\n', 1) status_line, headers_alone = response_header.split('\r\n', 1) splitted_status = status_line.split(' ') # response_code = status_line[1] status_cleaned = ' '.join(splitted_status[1:]) headers = Message(StringIO(headers_alone)) #print env['REQUEST_METHOD'], env['REQUEST_URI'] is_chunked = False corsfound = False hdrlist = [] for key in headers.keys(): if key == "transfer-encoding" and 'chunked' in headers[key]: is_chunked = True else: if key not in ['expires', 'date', 'last-modified']: hdrlist.append((key.strip(), headers[key])) if key.lower() == 'access-control-allow-origin': corsfound = True if not corsfound: # this is required for font hinting... # note that we will not overwrite cors headers b/c # in some xhr situations * is not sufficient hdrlist.append(('Access-Control-Allow-Origin','*')) if not is_push and env['SERVER_PROTOCOL'] == "HTTP/2": for i, push_host_strategy in enumerate(self.push_host): if passed_env['HTTP_HOST'] == push_host_strategy: if passed_env['REQUEST_URI'] == self.push_trigger_path[i]: linkstr = '' # TODO (bewo): is there any limitation? for asset in self.push_assets[i]: if linkstr != '': linkstr += ',' # print asset; linkstr += '<' + asset + '>; rel=preload' hdrlist.append(('x-extrapush', str(linkstr))) print 'WILL PUSH: ' ,len(self.push_assets[i]) #//, ('x-extrapush', str(linkstr)) break if not is_push: for i, hint_host_strategy in enumerate(self.hint_host): if passed_env['HTTP_HOST'] == hint_host_strategy: if passed_env['REQUEST_URI'] == self.hint_trigger_path[i]: linkstr = '' for j, asset in enumerate(self.hint_assets[i]): if linkstr != '': linkstr += ',' as_string = self.hint_as_string[i][j] if as_string != '': as_string='; as='+as_string+'' if self.hint_as_string[i][j] == "font": as_string += ";crossorigin" linkstr_to_append = '<'+asset + '>; rel=preload'+as_string+';type="'+self.hint_mimetype[i][j]+'"' linkstr += linkstr_to_append hdrlist.append(('link', str(linkstr))) print 'WILL HINT: ' ,len(self.hint_assets[i]) #//, ('x-extrapush', str(linkstr)) break # print "start response! " + environ['REQUEST_URI'] if is_chunked: # print "will decode chunked" start_response(status_cleaned, hdrlist) for chunk in decode(StringIO(response_body)): yield str(chunk) else: start_response(status_cleaned, hdrlist) yield response_body
# method 1, using mimetools # mimetools is deprecated and is not included in Python 3 # therefore, this method is stupid. DONT USE DEPRECATED SHIT. # Message: subclass of rfc822.Message with added methods # available methods in both super- and sub-class # choose_boundary() -> no idea, docs are cryptic # decode() -> process MIME-encoded data. Encoding types base64, 8bit, etc # encode() -> MIME encode, same encoding types as above # copyliteral() -> read until EOF, write somewhere # copybinary() -> same, but in blocks of 8192 bytes # additional methods for this sub-class # getplist() -> process parameter list of Content-Type header, list of strings. # getparam() -> read from plist. <> is stripped # getencoding() -> return Content-Transfer-Encoding value. defaults to '7bit' if not found # gettype() -> return message type from Content-Type # getmaintype() -> basically the same. defaults to return 'text' # getsubtype() -> basically the same. defaults to return 'plain' # for example, Content-Type of 'text/plain', main is 'text' sub is 'plain' from mimetools import Message from StringIO import StringIO request_line, headers_alone = request_text.split('\r\n', 1) headers = Message(StringIO(headers_alone)) print len(headers) # -> '3' print headers.keys() # -> ['accept-charset', 'host', 'accept'] print headers['Host'] # -> 'textfiles.com' # method 2, using BaseHTTPServer # nvm, this is not the droid I am looking for.