This repository has been archived by the owner on May 28, 2020. It is now read-only.
/
http_client.py
151 lines (121 loc) · 5.05 KB
/
http_client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# http_client.py written by Leo Ji (ljd753) and Lukas J. Gladic (ljg766)
# for EECS 340 Project 1 Winter 2019 with Professor Yan Chen
import sys
import socket
from urlparse import urlparse
# Checking if the input is of the right form
if len(sys.argv) != 2:
sys.exit(-1)
#Storing the inputed URL.
enteredUrl = sys.argv[1]
# Making sure that the first 7 characters of the URL are "http://"
if(enteredUrl[0:7] != 'http://'):
print("Does not start with HTTP://")
sys.exit(500)
# Main function, does most of the assignment
# enteredURL is the URL that was entered by the user
# counter keeps track of how many redirects we have gone through
def accessRequest(enteredUrl, counter):
# Checks number of redirects, and returns if it is 10 or more
if (counter > 9):
print("Reached 10 redirects, exiting")
return ("", 10)
# Parsing through the URL input using the URLparse library.
exitCode = 0
o = urlparse(enteredUrl)
# Checks if there is a colon in the parsed URL, and splits the string at the colon
hostTemp = o.netloc.split(":")
# If there was no split, use default port 80
if(len(hostTemp) == 1):
host = (hostTemp[0], 80)
# If there was one split, save the port that was typed in
elif(len(hostTemp) == 2):
host = (hostTemp[0], hostTemp[1])
# If there were multipe splits, there is something wrong with the URL
else:
print("Entered URL and ports are nonsensical")
sys.exit(102)
# Building the GET message
httpMsg = "GET "
# Checking if the URL is of the secure variety, and exiting if so
if(o.scheme == 'https'):
print ("Attempted to HTTPS")
sys.exit(403)
# If a path was not specified, just add a '/' to the message
if (o.path==""):
httpMsg += "/"
# If there was a path, add that to the message instead
else:
httpMsg += o.path
# Finishing the first line of the GET message
httpMsg += " HTTP/1.0"
httpMsg += "\r\n"
# Handles Host: header
httpMsg += "Host: "
httpMsg += host[0]
# If the port isn't the default Port 80, specify the port in the message
if(host[1] != 80):
httpMsg += ":"
httpMsg += host[1]
# End the message appropiately
httpMsg += "\r\n\r\n"
# Creating the socket object using the Python Socket library
socketObj = socket.create_connection(host)
# Sending the message using the socket we created
socketObj.sendall(httpMsg)
# Intializing the variable to hold the return message
msgReturn = ""
# Manages responses > 1024 bytes
# Borrowed from https://docs.python.org/2/library/socket.html
while True:
data = socketObj.recv(1024)
if not data: break
msgReturn += data
# At this point, we have the full HTTP response
# Splitting the return message up into lines
firstLine = msgReturn.splitlines()[0]
# Intializing the newURL varaible
newUrl = ""
# The response code is in the same place for every message, so we grab the numerical code
# We cast it to an int so we can compare it to other numbers
responseCode = int(firstLine[9:12])
# Checking if the code is either a temporary or permanent redirect
if (responseCode == 301 or responseCode == 302):
# Check each line in the response for a location line
for line in msgReturn.splitlines():
# Store the location of the location text
loc = line.find("Location: ")
# If 'Location: ' was in the given line, the response will not be -1
if (loc != -1):
# Update newURL with the new URL and break out of the loop
newUrl = line[10:]
break
# If the response code is a 400 code, exit the program with that as the exit code
if (responseCode >= 400 and responseCode < 500):
exitCode = responseCode
# Check if the content type line says text/html
contentType = msgReturn.find("Content-Type: text/html")
# If the type is not text/html exit with an error code
if(contentType == -1):
print("Returned type is not text/html")
sys.exit(101)
# Close the connection
socketObj.close()
# If there is a new URL, print a message
# Attempt to access the new URL and increase the redirect counter by 1
if(newUrl):
print("Redirected to: " + newUrl)
(msgReturn, exitCode) = accessRequest(newUrl, counter + 1)
# We return the message recieved and exit code
return (msgReturn, exitCode)
# Actually run the main function using the entered URL and a counter of 0
(msgReturn, exitCode) = accessRequest(enteredUrl , 0)
# Returning the string in a tuple changes some stuff, so we have to decode it,
# and then we print the message to screen.
# Taken from https://stackoverflow.com/questions/4020539/process-escape-sequences-in-a-string-in-python
print(str(msgReturn).decode('string_escape'))
# If the exit code was covered by any of our checks, exit using that code
if (int(exitCode) > 0):
sys.exit(exitCode)
# If we get here it means we eventually got a 200 OK and we exit with code 0
sys.exit(0)