예제 #1
0
 def __init__(self):
     # define sock by the method provided by transport layer
     self.sock = TcpSocket()
     self.chunked = False
     self.content = ''
     self.header = ''
예제 #2
0
class Http:
    def __init__(self):
        # define sock by the method provided by transport layer
        self.sock = TcpSocket()
        self.chunked = False
        self.content = ''
        self.header = ''

    def assemble_http_header(self, path, host):
        # assemble the http header in the application layer
        http_header = ''
        http_header += 'GET ' + path + ' HTTP/1.1\n'
        http_header += 'Host: ' + host + '\r\n'
        http_header += 'Connection: keep-alive\r\n'
        http_header += 'Accept: text/html\r\n'
        http_header += '\r\n'

        # make sure the http header is even
        if len(http_header) % 2 != 0:
            http_header += ' '
        
        return http_header

    def send(self, data):
        # call the send method provided by transort layer
        self.sock.send(data)

    def receive(self):
        data_recv = ''
        content_len = 0
        flag_received = False
        time_started = time()

        # call the method recv_all provided by transport layer
        data_recv = self.sock.recv_all()
        #remove the http header
        page = self.remove_header(data_recv)
        # tell whether the data received is chunked, if chunked, remove the chunk length 
        if self.parse_chunked(page):
            try:
                self.content = self.remove_chunk_length(page)
            except ValueError:
                self.content = page
        else:
            self.content = page
        
        return self.content
        
    def remove_header(self, data):
        # function for removing the http header
        header_offset = data.split('\r\n\r\n', 1)
        self.header = header_offset[0]
        return header_offset[1]

    def parse_chunked(self, data):
        # determine whether chuck is appeared in the downloaded file
        # get the chunk from the first line of downloaded file and try to match that
        first_line = data.split('\r\n', 1)[0]
        m = re.match(r'^[a-zA-Z0-9]+$', first_line)

        # if it exists, return true, else, return false
        if m is not None:
            return True
        if m is None:
            return False

    def remove_chunk_length(self, data):
        content = []
        while True:
            # get the chunk number and rest data respectively
            first_line = data.split('\r\n', 1)[0]
            rest_data = data.split('\r\n', 1)[1]
            m = re.match(r'^[a-zA-Z0-9]+$', first_line)
            # find chunk, and read data according to the chunk
            if m is not None:
                chunk_size = int(m.group(0), 16)
                content.append(rest_data[:chunk_size])
                data = rest_data[chunk_size + 2:]
                # if chuck is 0, exit out of the while loop, means we have received all the data
                if chunk_size == 0:
                    break
            # if can not find chunk, raise exception
            elif m is None:
                raise ValueError

        return ''.join(content)

    def save_file(self, data, url):
        # save file into the local folder
        new_file_name = ''

        # deal with the default name of downloaded page
        path = urlparse(url).path
        file_name = path.split('/')[-1]
        if file_name == '':
            new_file_name = "index.html"
        else:
            new_file_name = file_name

        # write the data into the fixed file and close the stream
        f = open(new_file_name, 'wb')
        f.write(data)
        f.close()

    def grep_data(self, url):
        # check the format of the url, keep the http://
        if 'http://' == url[:7]:
            pass
        else:
            url += 'http://'

        # get the host and path through the url_obj
        url_obj = urlparse(url)
        host = url_obj.netloc
        path = url_obj.path
        if path == '':
            path = '/'
        else:
            pass

        # connect the socket to the server, the connect method is provide by transport layer
        port = 80
        self.sock.connect(host, port)

        # assemble the http header
        data_sent = self.assemble_http_header(path, host)

        # send the data
        self.send(data_sent)

        # receive the data from the server
        data_recv = self.receive()

        # print data_recv
        self.save_file(data_recv, url)

        # close the connection using close method provided by tarnsport layer
        self.sock.close()
예제 #3
0
from urlparse import urlparse
from time import time
RES_TIMEOUT = 10

def form_get(path, host):
    line = ''
    line += 'GET ' + path + ' HTTP/1.1\r\n'
    line += 'Host: ' + host + '\r\n'
    line += 'Connection: keep-alive\r\n'
    line += 'Accept: text/html\r\n'
    line += 'Accept-Encoding: gzip\r\n'
    #line += 'Connection: Keep-Alive\r\n'
    line += '\r\n'
    return line

s = TcpSocket()

address = sys.argv[1]
if address[:7] == 'http://':
    pass
else:
    address = 'http://'+address
o = urlparse(address)

s.connect(o.netloc)

path = ''
foo = ''
if o.path == '':
    path = '/'
else:
예제 #4
0
RES_TIMEOUT = 10


def form_get(path, host):
    line = ''
    line += 'GET ' + path + ' HTTP/1.1\r\n'
    line += 'Host: ' + host + '\r\n'
    line += 'Connection: keep-alive\r\n'
    line += 'Accept: text/html\r\n'
    line += 'Accept-Encoding: gzip\r\n'
    #line += 'Connection: Keep-Alive\r\n'
    line += '\r\n'
    return line


s = TcpSocket()

address = sys.argv[1]
if address[:7] == 'http://':
    pass
else:
    address = 'http://' + address
o = urlparse(address)

s.connect(o.netloc)

path = ''
foo = ''
if o.path == '':
    path = '/'
else:
예제 #5
0
 def __init__(self):
     # define sock by the method provided by transport layer
     self.sock = TcpSocket()
     self.chunked = False
     self.content = ''
     self.header = ''
예제 #6
0
class Http:
    def __init__(self):
        # define sock by the method provided by transport layer
        self.sock = TcpSocket()
        self.chunked = False
        self.content = ''
        self.header = ''

    def assemble_http_header(self, path, host):
        # assemble the http header in the application layer
        http_header = ''
        http_header += 'GET ' + path + ' HTTP/1.1\n'
        http_header += 'Host: ' + host + '\r\n'
        http_header += 'Connection: keep-alive\r\n'
        http_header += 'Accept: text/html\r\n'
        http_header += '\r\n'

        # make sure the http header is even
        if len(http_header) % 2 != 0:
            http_header += ' '

        return http_header

    def send(self, data):
        # call the send method provided by transort layer
        self.sock.send(data)

    def receive(self):
        data_recv = ''
        content_len = 0
        flag_received = False
        time_started = time()

        # call the method recv_all provided by transport layer
        data_recv = self.sock.recv_all()
        #remove the http header
        page = self.remove_header(data_recv)
        # tell whether the data received is chunked, if chunked, remove the chunk length
        if self.parse_chunked(page):
            try:
                self.content = self.remove_chunk_length(page)
            except ValueError:
                self.content = page
        else:
            self.content = page

        return self.content

    def remove_header(self, data):
        # function for removing the http header
        header_offset = data.split('\r\n\r\n', 1)
        self.header = header_offset[0]
        return header_offset[1]

    def parse_chunked(self, data):
        # determine whether chuck is appeared in the downloaded file
        # get the chunk from the first line of downloaded file and try to match that
        first_line = data.split('\r\n', 1)[0]
        m = re.match(r'^[a-zA-Z0-9]+$', first_line)

        # if it exists, return true, else, return false
        if m is not None:
            return True
        if m is None:
            return False

    def remove_chunk_length(self, data):
        content = []
        while True:
            # get the chunk number and rest data respectively
            first_line = data.split('\r\n', 1)[0]
            rest_data = data.split('\r\n', 1)[1]
            m = re.match(r'^[a-zA-Z0-9]+$', first_line)
            # find chunk, and read data according to the chunk
            if m is not None:
                chunk_size = int(m.group(0), 16)
                content.append(rest_data[:chunk_size])
                data = rest_data[chunk_size + 2:]
                # if chuck is 0, exit out of the while loop, means we have received all the data
                if chunk_size == 0:
                    break
            # if can not find chunk, raise exception
            elif m is None:
                raise ValueError

        return ''.join(content)

    def save_file(self, data, url):
        # save file into the local folder
        new_file_name = ''

        # deal with the default name of downloaded page
        path = urlparse(url).path
        file_name = path.split('/')[-1]
        if file_name == '':
            new_file_name = "index.html"
        else:
            new_file_name = file_name

        # write the data into the fixed file and close the stream
        f = open(new_file_name, 'wb')
        f.write(data)
        f.close()

    def grep_data(self, url):
        # check the format of the url, keep the http://
        if 'http://' == url[:7]:
            pass
        else:
            url += 'http://'

        # get the host and path through the url_obj
        url_obj = urlparse(url)
        host = url_obj.netloc
        path = url_obj.path
        if path == '':
            path = '/'
        else:
            pass

        # connect the socket to the server, the connect method is provide by transport layer
        port = 80
        self.sock.connect(host, port)

        # assemble the http header
        data_sent = self.assemble_http_header(path, host)

        # send the data
        self.send(data_sent)

        # receive the data from the server
        data_recv = self.receive()

        # print data_recv
        self.save_file(data_recv, url)

        # close the connection using close method provided by tarnsport layer
        self.sock.close()