예제 #1
0
def option_response(response):
    option = http_record_pb2.RequestResponse()
    option.CopyFrom(response)
    option.response.ClearField('body')
    line = option.request.first_line.decode('utf-8').split(' ')
    line = ['OPTIONS', line[1], line[2]]
    option.request.first_line = ' '.join(line).encode('utf-8')
    option.response.ClearField('header')
    option.response.first_line = b'HTTP/1.1 200 OK'
    header = option.response.header.add()
    header.CopyFrom(HTTPHeader(b'Allow', b'GET, POST, HEAD, OPTIONS, PUT'))
    header = option.response.header.add()
    header.CopyFrom(HTTPHeader(b'Content-Length', b'0'))
    return option
예제 #2
0
def init_ip():
    traffic = open(join(repo, 'traffic.txt'), 'r').readlines()
    while traffic[-1] == '':
        del traffic[-1]
    for t in traffic:
        t = t.split('\t')
        ip_delays[t[0]] = float(t[1])
    for save in save_list:
        f = open(join(repo, save), 'rb').read()
        response = http_record_pb2.RequestResponse()
        response.ParseFromString(f)
        ips[response.ip] = 1
        hosts_ips[get_host(response.request.header)] = [response.ip]
    for host, ip in hosts_ips.items():
        hosts_ips[host].append(find_emptyip(ip[0]))
        hosts_ips[host].append(find_emptyip(ip[0]))
예제 #3
0
def main():
    init_ip()
    init_ttfb()
    for save in save_list:
        f = open(join(repo, save), 'rb').read()
        response = http_record_pb2.RequestResponse()
        response.ParseFromString(f)

        host = get_host(response.request.header)
        # print('{}: {}\n{}\n{}\n\n'.format(host, response.ip, response.request.first_line, response.response.first_line))
        # if response.request.first_line.decode().split(' ')[1].find('async_sso') != -1:
        #     for header in response.request.header:
        #         print('{}: {}'.format(header.key, header.value))
        # continue
        if host is None:
            return

        if response.request.first_line.decode('utf-8').split(' ')[1] == '/':
            i = 0
            while i < len(response.response.header):
                beginX = response.response.header[i].key.lower().decode('utf-8').find('x-')
                beginC = response.response.header[i].key.lower().decode('utf-8').find('content-security-policy')
                beginA = response.response.header[i].key.lower().decode('utf-8').find('credentials')
                if beginX == 0 or beginC == 0 or beginA != -1:
                    del response.response.header[i]
                    i -= 1
                i += 1
            newheader = response.response.header.add()
            newheader.key = b'Access-Control-Allow-Origin'
            newheader.value = b'*'
            f0 = open(join(repo, save), 'wb+')
            f0.write(response.SerializeToString())
            continue

        # If homepage, only delete the headers

        redirect = http_record_pb2.RequestResponse()
        origin = http_record_pb2.RequestResponse()
        # option = http_record_pb2.RequestResponse()
        redirect.CopyFrom(response)
        origin.CopyFrom(response)


        # Edit first line to 3017 Clear others
        first_line = redirect.response.first_line.decode('utf-8').split(' ')
        first_line = [first_line[0], '307', 'Temporary Redirect']
        redirect.response.first_line = ' '.join(first_line).encode('utf-8')
        redirect.response.ClearField('body')
        redirect.response.ClearField('header')
        newheader = redirect.response.header.add()
        newheader.key = b'Access-Control-Allow-Origin'
        newheader.value = b'*'

        # determine whether cacheable
        cacheable = if_cacheable(response.response.header) \
                    and response.scheme == http_record_pb2.RequestResponse.HTTP

        # Setup new host's delay and ip
        new_host = modify_location(host, cacheable)
        origin.ip = hosts_ips[host][1] if cacheable else hosts_ips[host][2]
        ip_delays[origin.ip] = proxy_delay if cacheable else ip_delays[response.ip]

        # Update hosts ttfb
        uri = urlparse(response.request.first_line.decode('utf-8').split(' ')[1]).path
        if new_host not in url_ttfb:
            url_ttfb[new_host] = {}
        if host in url_ttfb and uri in url_ttfb[host]:
            url_ttfb[new_host][uri] = url_ttfb[host][uri]
            if cacheable:
                url_ttfb[new_host][uri] -= ip_delays[hosts_ips[host][0]]*1000
            del url_ttfb[host][uri]

        # Change host to cacheable/uncacheable
        for i in range(len(origin.request.header)):
            if origin.request.header[i].key.lower() == b'host':
                origin.request.header[i].value = new_host.encode('utf-8')


        i = 0
        has_Cors = False
        while i < len(origin.response.header):
            beginX = origin.response.header[i].key.lower().decode('utf-8').find('x-content-security')
            beginC = origin.response.header[i].key.lower().decode('utf-8').find('content-security-policy')
            beginR = origin.response.header[i].key.lower().decode('utf-8').find('credentials')
            beginA = origin.response.header[i].key.lower().decode('utf-8').find('access-control-allow-origin')
            if beginX == 0 or beginC == 0 or beginR != -1:
                del origin.response.header[i]
                i -= 1
            elif beginA == 0:
                has_Cors = True
            i += 1
        # If no CORS settings, do it
        if not has_Cors:
            newheader = origin.response.header.add()
            newheader.key = b'Access-Control-Allow-Origin'
            newheader.value = b'*'

        # Writet option request and response
        # option.CopyFrom(option_response(origin))

        # Set redirection location (full url)
        location = redirect.response.header.add()
        content_length = redirect.response.header.add()
        new_host += response.request.first_line.decode('utf-8').split(' ')[1]
        new_host = 'https://' + new_host if response.scheme == http_record_pb2.RequestResponse.HTTPS else 'http://' + new_host
        location.CopyFrom(HTTPHeader(b'Location', new_host.encode('utf-8') ) )
        content_length.CopyFrom(HTTPHeader(b'Content-Length', b'0'))

        fd2, path2 = tempfile.mkstemp('', 'save.', repo)
        # fd3, path3 = tempfile.mkstemp('', 'save.', repo)
        # print(path1 + '\n' + path2)
        # fd0 = open(join(repo, save.replace('save', 'response')), 'w+')
        # fd1 = open(join(repo, save.replace('save', 'origin')), 'w+')
        # fd2 = open(join(repo, save.replace('save', 'redirect')), 'w+')
        fd1 = open(join(repo, save), 'wb+')
        fd1.write(redirect.SerializeToString())
        os.write(fd2, origin.SerializeToString())
        # os.write(fd3, option.SerializeToString())
        fd1.close()
        os.close(fd2)
        # os.close(fd3)
        # print((list(response.response.header)[0]))
    # return
    traffic = open(join(repo, 'traffic.txt'), 'w+')
    for host in hosts_ips:
        ip_delays[hosts_ips[host][0]] = 0 # in secs
    for ip, delay in ip_delays.items():
        traffic.write('{}\t{}\n'.format(ip, delay))
    traffic.close()
    for host, uri_delay in url_ttfb.items():
        if uri_delay == {}:
            continue
        f = open(os.path.join(repo, host), 'w+')
        for uri, delay in uri_delay.items():
            f.write('{}\t{}\n'.format(uri, delay))
        f.close()
예제 #4
0
if len(sys.argv) != 3:
  print "Usage:", sys.argv[0], "HTTP_RESPONSE_FILE", "OUTPUT_DIRECTORY"
  sys.exit(-1)

subprocess.Popen("mkdir -p {}".format(sys.argv[2]), shell=True)

def extractUrlFromString(url):
    regex = '(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
    urls = re.findall(regex, url)
    if not urls:
        return url[0]
    else:
        return url

http_response = http_record_pb2.RequestResponse()

file_counter = 0

def copy(source, destination):
    subprocess.Popen("cp -r {} {}/".format(source, destination), shell=True)

def unchunk(body):
    new_body = ""
    # iterate through chunks until we hit the last chunk
    crlf_loc = body.find('\r\n')
    chunk_size = int( body[:crlf_loc], 16 )
    body = body[crlf_loc+2:]
    while( chunk_size != 0 ):
        # add chunk content to new body and remove from old body
        new_body += body[0:chunk_size]