-
Notifications
You must be signed in to change notification settings - Fork 0
/
ftp_tests.py
143 lines (92 loc) · 2.83 KB
/
ftp_tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import gzip
from ftplib import FTP
from io import StringIO
ftp = FTP('ftp.ncbi.nlm.nih.gov')
ftp.login()
r = StringIO()
ftp.retrbinary('RETR /pub/README_ABOUT_BZ2_FILES', r.write)
print r.getvalue()
#ftp://ftp.ncbi.nlm.nih.gov/pubmed/id_list.txt.gz
import ftputil
host = ftputil.FTPHost('ftp.ncbi.nlm.nih.gov', 'anonymous', '')
host.chdir('/pubmed/sample-2019-01-01/')
file_list = host.listdir(host.curdir)
#['example.xml', 'reflist.example.xml']
def read_file(fname):
with host.open(fname) as input_file:
for line in input_file:
yield line
for line in read_file("example.xml"):
print(line)
def read_gz_file(fname):
with host.open(fname, mode="rb") as input_file:
return gzip.GzipFile(fileobj=input_file)
host.chdir('/pubmed/')
i = 0
compressed_xml = read_gz_file("id_list.txt.gz")
for line in gzip.decompress(compressed_xml):
i += 1
print(i, line)
if i > 10:
break
print(i)
##----------------------------------------------------------------------------##
from ftplib import FTP
import gzip
from io import StringIO
ftp = FTP('ftp.ncbi.nlm.nih.gov')
ftp.login() # Username: anonymous password: anonymous@
sio = StringIO()
def handle_binary(more_data):
sio.write(more_data)
resp = ftp.retrbinary("RETR pub/pmc/PMC-ids.csv.gz", callback=handle_binary)
sio.seek(0) # Go back to the start
zippy = gzip.GzipFile(fileobj=sio)
uncompressed = zippy.read()
print(uncompressed[:1000])
import urllib
from io import BytesIO
import urllib.request
url = 'ftp://ftp.ncbi.nlm.nih.gov/pubmed/id_list.txt.gz'
resp = urllib.request.urlopen(url)
memfile = BytesIO(resp.read())
with gzip.GzipFile(memfile, 'r') as myzip:
f = myzip.open('eggs.txt')
content = f.read() // or other file-like commands
zippy = gzip.GzipFile(fileobj=memfile)
uncompressed = zippy.read()
respData = resp.read()
#### !!!!
mysock = urllib.request.urlopen(url)
memfile = BytesIO(mysock.read())
f = gzip.GzipFile(fileobj=memfile)
r = f.read()
t = r.decode('utf-8')
###
import gzip
from io import BytesIO
import shutil
from ftplib import FTP
ftp = FTP('ftp.ncbi.nlm.nih.gov')
ftp.login('anonymous', '')
flo = BytesIO()
ftp.retrbinary('RETR /pubmed/id_list.txt.gz', flo.write)
flo.seek(0)
with open('archive.tar', 'wb') as fout, gzip.GzipFile(fileobj = flo) as gzip:
shutil.copyfileobj(gzip, fout)
with gzip.GzipFile(fileobj = flo) as gzip:
x = gzip.read()
gzipfile = gzip.GzipFile(mode='rb', fileobj=flo).read()
text = gzipfile.read()
buf = BytesIO(response.read())
f = gzip.GzipFile(fileobj=buf)
r = f.read()
###
import urllib
import io
from zipfile import ZipFile
mysock = urllib.urlopen('ftp://ftp.yourhost.com/spam.zip') # check urllib for parameters
memfile = io.BytesIO(mysock.read())
with ZipFile(memfile, 'r') as myzip:
f = myzip.open('eggs.txt')
content = f.read() // or other file-like commands