def extract_http(request_text, dict_filed):
    request_line, headers_alone = request_text.split('\r\n', 1)
    headers = Message(StringIO(headers_alone))
    # print request_line
    request_line = request_line.split(' ', 2)
    # print request_line[0]
    if request_line[0] in {'GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'TRACE', 'CONNECT'}:
        dict_filed['tags']['action'] = request_line[0]
        dict_filed['tags']['Domain'] = headers['host']
        dict_filed['tags']['URL'] = headers['host'] + request_line[1]
        # print request_line[1]
    else:
        dict_filed['tags']['result_code'] = request_line[1]
    if 'user_agent' in headers.keys():
        dict_filed['tags']['user_agent'] = headers['user_agent']
    if 'referer' in headers.keys():
        dict_filed['tags']['referer'] = headers['referer']
    if 'content-type' in headers.keys():
        dict_filed['tags']['content-type'] = headers['content-type']
    if 'accept' in headers.keys():
        dict_filed['tags']['content-type'] = headers['accept']
    # print headers.keys()
    # print dict_filed['URL']
    """
예제 #2
0
    def __call__(self, environ, start_response):
        #print "call! " + environ['REQUEST_URI']
        #print environ
        hdrlist = []
        env = dict(environ)
        if env['HTTP_HOST'].endswith('.'):
           env['HTTP_HOST'] = env['HTTP_HOST'][:-1]


        #FastPath for Push Garbage:
        if env['REQUEST_METHOD'] == "GET" and env['REQUEST_URI'].startswith('/__GARBAGE__'):
            amount_kb = int(env['REQUEST_URI'].split('__')[2])
            N = amount_kb * 1024
            response = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(N))
            start_response('200 OK', [('Content-Type', 'text/plain')])
            yield response
        else:

            cached_response = None
            is_push = False
            if env['REQUEST_METHOD'] == "GET":
                for u,host in enumerate(self.push_host):
                    if host == env['HTTP_HOST']:
                      for v,push_resource in enumerate(self.push_assets[u]):
                            parsed = urlparse(push_resource)
                            stripped = push_resource[len('https://'+parsed.netloc):]
                            push_resource = stripped
                            if push_resource == env['REQUEST_URI']:
                                #print "pushing from cache..."
                                is_push = True
                                cached_response = self.push_cache[u][v]

            if cached_response is None:
                # print environ['REQUEST_URI'], "Not cached!"
                passed_env = dict()

                # remap for compat with replayserver
                passed_env['MAHIMAHI_CHDIR'] = env['WORKING_DIR']
                passed_env['MAHIMAHI_RECORD_PATH'] = env['RECORDING_DIR']
                passed_env['REQUEST_METHOD'] = env['REQUEST_METHOD']
                passed_env['REQUEST_URI'] = env['REQUEST_URI']

                # env['SERVER_PROTOCOL'], is currently a hack to find the corresponding
                # h1 traces
                passed_env['SERVER_PROTOCOL'] = "HTTP/1.1"
                passed_env['HTTP_HOST'] = env['HTTP_HOST']

                #if 'HTTP_USER_AGENT' in env.keys():
                #    passed_env['HTTP_USER_AGENT'] = env['HTTP_USER_AGENT']

                if env['wsgi.url_scheme'] == 'https':
                    passed_env['HTTPS'] = "1"

                # shell=True,
                p = subprocess.Popen(
                    [env['REPLAYSERVER_FN']], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=passed_env)

                (cached_response, replay_stderr) = p.communicate()


            response_header, response_body = cached_response.split('\r\n\r\n', 1)

            status_line, headers_alone = response_header.split('\r\n', 1)
            splitted_status = status_line.split(' ')

            # response_code = status_line[1]

            status_cleaned = ' '.join(splitted_status[1:])

            headers = Message(StringIO(headers_alone))

            #print env['REQUEST_METHOD'], env['REQUEST_URI']
           
            is_chunked = False
            corsfound = False
            hdrlist = []

            for key in headers.keys():
                if key == "transfer-encoding" and 'chunked' in headers[key]:
                    is_chunked = True
                else:
                    if key not in ['expires', 'date', 'last-modified']:
                        hdrlist.append((key.strip(), headers[key]))
                    if key.lower() == 'access-control-allow-origin':
                        corsfound = True
            
            if not corsfound: 
                # this is required for font hinting...
                # note that we will not overwrite cors headers b/c
                # in some xhr situations * is not sufficient
                hdrlist.append(('Access-Control-Allow-Origin','*'))

            
            if not is_push and env['SERVER_PROTOCOL'] == "HTTP/2":
                for i, push_host_strategy in enumerate(self.push_host):
                    if passed_env['HTTP_HOST'] == push_host_strategy:
                        if passed_env['REQUEST_URI'] == self.push_trigger_path[i]:
                            linkstr = ''
                            # TODO (bewo): is there any limitation?
                            for asset in self.push_assets[i]:
                                if linkstr != '':
                                     linkstr += ','
                                # print asset;
                                linkstr += '<' + asset + '>; rel=preload'
                            hdrlist.append(('x-extrapush', str(linkstr)))
                            print 'WILL PUSH: ' ,len(self.push_assets[i]) #//, ('x-extrapush', str(linkstr))
                            break

            if not is_push:
                    for i, hint_host_strategy in enumerate(self.hint_host):
                        if passed_env['HTTP_HOST'] == hint_host_strategy:
                            if passed_env['REQUEST_URI'] == self.hint_trigger_path[i]:
                                linkstr = ''
                                for j, asset in enumerate(self.hint_assets[i]):
                                    if linkstr != '':
                                       linkstr += ','
                                    as_string = self.hint_as_string[i][j]
                                    if as_string != '':
                                       as_string='; as='+as_string+''
                                    if self.hint_as_string[i][j] == "font":
                                       as_string += ";crossorigin"
                                    linkstr_to_append = '<'+asset + '>; rel=preload'+as_string+';type="'+self.hint_mimetype[i][j]+'"'
                                    linkstr += linkstr_to_append
                                hdrlist.append(('link', str(linkstr)))
                                print 'WILL HINT: ' ,len(self.hint_assets[i]) #//, ('x-extrapush', str(linkstr))
                                break

            # print "start response! " + environ['REQUEST_URI']

            if is_chunked:
                # print "will decode chunked"
                start_response(status_cleaned, hdrlist)
                for chunk in decode(StringIO(response_body)):
                    yield str(chunk)
            else:
                start_response(status_cleaned, hdrlist)
                yield response_body
# method 1, using mimetools
# mimetools is deprecated and is not included in Python 3
# therefore, this method is stupid. DONT USE DEPRECATED SHIT.
# Message: subclass of rfc822.Message with added methods
# available methods in both super- and sub-class
#    choose_boundary() -> no idea, docs are cryptic
#    decode() -> process MIME-encoded data. Encoding types base64, 8bit, etc
#    encode() -> MIME encode, same encoding types as above
#    copyliteral() -> read until EOF, write somewhere
#    copybinary() -> same, but in blocks of 8192 bytes
# additional methods for this sub-class
#    getplist() -> process parameter list of Content-Type header, list of strings. 
#    getparam() -> read from plist. <> is stripped
#    getencoding() -> return Content-Transfer-Encoding value. defaults to '7bit' if not found
#    gettype() -> return message type from Content-Type
#    getmaintype() -> basically the same. defaults to return 'text'
#    getsubtype() -> basically the same. defaults to return 'plain'
# for example, Content-Type of 'text/plain', main is 'text' sub is 'plain'
from mimetools import Message
from StringIO import StringIO
request_line, headers_alone = request_text.split('\r\n', 1)
headers = Message(StringIO(headers_alone))

print len(headers)    # -> '3'
print headers.keys()  # -> ['accept-charset', 'host', 'accept']
print headers['Host'] # -> 'textfiles.com'


# method 2, using BaseHTTPServer
# nvm, this is not the droid I am looking for.