/
download.py
127 lines (101 loc) · 3.67 KB
/
download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
__author__ = 'will'
import requests
import xml.etree.cElementTree as ET
import os, sys, time
from urllib import unquote
#you can use postman to generate this part :)
headers = {
'content-type': "application/xml",
'accept': "application/xml",
'authorization': "Basic your_basic_auth_code_should_be_here",
'user-agent': "your name,email, basecamp server checks this field, if not set up properly, you will be limited to send the request very slowly",
'cache-control': "no-cache",
}
err_list = []
succ_list = []
#fetch the file, store it to the path dir
def download_files(urls, path):
url = urls['url']
size = urls['size']
fid = urls['id']
filename = fid + '_' + unquote(url).split('/')[-1]
if os.path.exists(path + '/' + filename):
if os.path.getsize(path + '/' + filename) == size:
succ_list.append(url)
return
print "Downloading... :" + filename + "\tsize: " + str(size/1024.0) + "KB"
response = requests.request("GET", url, headers=headers)
if response.status_code != 200:
time.sleep(3)
response = requests.request("GET", url, headers=headers)
if response.status_code != 200:
time.sleep(3)
response = requests.request("GET", url, headers=headers)
if response.status_code != 200:
print "ERROR", url
err_list.append(url)
return
with open(path + "/" + filename, "wb+") as f:
f.write(response.content)
succ_list.append(url)
#fetch all the files' url of your project
def download_urls(project, count=0):
url = "https://your_domain_should_be_here.basecamphq.com/projects/%s/attachments?n=%s" % (project, count)
response = requests.request("GET", url, headers=headers)
while response.status_code != 200:
time.sleep(3)
response = requests.request("GET", url, headers=headers)
if response.status_code != 200:
print "ERROR:", project
return '', []
# print(response.text)
print response.status_code
tree = ET.fromstring(response.text)
files = []
attas = tree.findall('attachment')
for att in attas:
f = {}
f['size'] = int(att.find('byte-size').text)
f['id'] = att.find('id').text
f['url'] = att.find('download-url').text
files.append(f)
return response.text, files
#your project_id and name, this program save them in separate folders
projects_id = ['0000001', '180201']
projects_name = ['project_name1', 'This is example']
#save files to this directory
BASE_DIR = '/Volumes/WILL-U-DISK/'
if len(sys.argv) < 3:
raise RuntimeError("args error!")
start = int(sys.argv[1])
end = int(sys.argv[2])
if len(projects_name) < end:
end = len(projects_name)
print "[%d, %d)" % (start, end)
for i in range(start ,end):
path = BASE_DIR + projects_name[i]
if not os.path.isdir(path):
os.mkdir(path)
finished = False
count = 0
err_list = []
succ_list = []
while (not finished):
response_t, urls = download_urls(projects_id[i], count)
count += len(urls)
time.sleep(1)
if len(urls) == 0:
finished = True
break
with open(BASE_DIR + projects_name[i] + '_' + str(count) + '.xml', 'wb+') as f:
f.write(response_t)
for u in urls:
download_files(u, path)
with open(path + '/' + "a_succ.txt", "wb+") as f:
f.write("Successful Download file count: " + str(len(succ_list)) + "\n")
for succ in succ_list:
f.write(succ + "\n")
with open(path + '/' + "a_failed.txt", "wb+") as f:
f.write("Failed Download file count: " + str(len(err_list)) + "\n")
for err in err_list:
f.write(err + "\n")