def get_all_psps(basedir, exchange, kind): directory = basedir + '/' + exchange + '_' + kind if not _os.path.isdir(directory): _os.mkdir(directory) ftp = _FTP('ftp.abinit.org') # connect to host, default port ftp.login() # user anonymous, passwd anonymous@ for i in range(1, 113): filename = psp_name(i, exchange, kind) if not _os.path.isfile(directory + '/' + filename): print('Getting...' + filename) nofile = True while nofile: try: res = ftp.retrbinary('RETR ' + get_ftp_psp(i, exchange, kind) + filename, open(directory + '/' + filename, 'wb').write) if _os.path.getsize(directory + '/' + filename) == 0: _os.remove(directory + '/' + filename) nofile = False else: nofile = False except ValueError: print('Could not download ' + filename) ftp.close() if _os.path.isfile(directory + '/' + filename): _os.remove(directory + '/' + filename) ftp = _FTP('ftp.abinit.org') # connect to host, default port ftp.login() # user anonymous, passwd anonymous@ nofile = False ftp.close()
def connect(self): with Timeout(self.timeout): # parse ipaddress with urllib, "//" needed result = urllib.parse.urlsplit('//' + self.host) if (result.port): ftp = _FTP() ftp.connect(result.hostname, result.port) else: ftp = _FTP(result.hostname) ftp.login(self.user, self.password) try: yield ftp finally: ftp.close()
def list_order_files(user, order_id): """ List the file in an ARM Archive order. Parameters ---------- user : str Username order_id : str or int Order id. Returns ------- files : list List of files in the order. """ # log into the FTP server ftp = _FTP(_ARM_FTP) ftp.login() # change to the correct directory ftp.cwd(user) ftp.cwd(str(order_id)) return ftp.nlst()
def order_download(user, order_id, files=None): """ Download files from a ARM Archive order. Files are stored in the current working directory, no return. Parameters ---------- user : str Username order_id : str or int Order id. files : list, str or None, optional. Files to download, either a single file, a list of files, or None to retrieve all files in the order. """ # log into the FTP server ftp = _FTP(_ARM_FTP) ftp.login() # change to the correct directory ftp.cwd(user) ftp.cwd(str(order_id)) # grab the files if files is None: files = ftp.nlst() if isinstance(files, str): files = [files] for filename in files: print("Retrieving:", filename) ftp.retrbinary('RETR ' + filename, open(filename, 'wb').write) ftp.quit() return
def __init__(self, target, opts): """ init """ Base.__init__(self, target, opts) self.ftp = _FTP(timeout=3) # ftplib FTP instance return
def connect(self): with Timeout(self.timeout): ftp = _FTP(self.host) ftp.login(self.user, self.password) try: yield ftp finally: ftp.close()
def _avail_files_dict(repository): # Login to the FTP and get available files iftp = _FTP(repository[0]) iftp.login() # Go to subdirectories for idir in repository[1:]: iftp.cwd(idir) # Get the ls -l output afd = {tup[0]: tup[1] for tup in iftp.mlsd() if tup[0] not in ['.', '..']} iftp.close() return afd
def __init__(self, dwx_ftp_user='******', dwx_ftp_pass='******', dwx_ftp_hostname='<insert Darwinex Tick Data FTP host>', dwx_ftp_port=21): if dwx_ftp_hostname[:6] == 'ftp://': dwx_ftp_hostname = dwx_ftp_hostname[6:] # # Dictionary DB to hold dictionary objects in FX/Hour format # self._asset_db = {} self._ftpObj = _FTP(dwx_ftp_hostname) self._ftpObj.login(dwx_ftp_user, dwx_ftp_pass) self._virtual_dl = None self.available_assets = self._dir('') self._widgets_available = True self.num_retries = 3 self.await_time = 10 print('Connected Darwinex Ticks Data Server')
def list_orders_ready(user): """ List all Archive orders which are ready for download. Parameters ---------- user : str Username Returns ------- order_ids : list List of open order ids for the user. """ # log into the FTP server, change to users directory ftp = _FTP(_ARM_FTP) ftp.login() ftp.cwd(user) return ftp.nlst()
def getFromENA(self, run_acc_list, ftp_server_url = 'ftp.sra.ebi.ac.uk', local_reads_path = ['reads']): ''' Given a list of 'run' accession numbers for paired end short read analyses, download the read files from the European Nucleotide Archive. If using a mirror server, supply an alternative for 'ftp_server_url'. 'local_reads_path' can be a path string or list or folder names. ''' if isinstance(local_reads_path, list): local_reads_path = _os.path.sep.join(local_reads_path) if not _os.path.exists(local_reads_path): _os.makedirs(local_reads_path) print('Logging in to %s' % ftp_server_url) ftp = _FTP(ftp_server_url) # anonymous login print(ftp.login()) def check_connection(ftp): try: print('FTP: %s' % ftp.voidcmd("NOOP")) # http://docs.python.org/2/library/ftplib.html return(True) except IOError as e: print('Seems to be a problem with the connection to FTP server:') print('I/O error({0}): {1}'.format(e.errno, e.strerror) ) return(False) def calc_checksum(filepath): hasher = _md5() handle = open(filepath, 'rb') buff = handle.read(65536) while len(buff) > 0: hasher.update(buff) buff = handle.read(65536) return(hasher.hexdigest()) downloaded_read_files = {} start_time = _time.time() failed = [] for cnum,run_acc in enumerate(run_acc_list): query_url_base = 'http://www.ebi.ac.uk/ena/data/warehouse/search?query=' success = False tries = 0 max_tries = 5 while not success: rest_req = '"run_accession=%s"&result=read_run&fields=fastq_ftp,fastq_md5&display=report' % run_acc print('Sending query to ENA:\n%s' % rest_req) result = _urllib2.urlopen(query_url_base + rest_req).read() print('ENA accession numbers query result:\n%s' % result) if result.count('ERR') == 7: success = True else: print('Query result from ENA was unexpected on attempt %s of %s' % (tries, max_tries)) _time.sleep(0.5) tries += 1 if tries == max_tries: print('Attempt %s failed. Try again later and if problem persists, report bug.' % tries) failed += [run_acc] break #_sys.exit(1) if not success: continue md5s = result.split('\n')[-2].split('\t')[-1][:-1].split(';') ENA_paths = result.split('\n')[-2].split('\t')[-2][:-1].split(';') ENA_reads_pair_paths = {} ENA_reads_pair_paths[1] = ENA_paths[0].replace(ftp_server_url, '') ENA_reads_pair_paths[2] = ENA_paths[1].replace(ftp_server_url, '') local_reads_pair_paths = {} local_reads_pair_paths[1] = local_reads_path + \ _os.path.sep + \ ENA_reads_pair_paths[1].split('/')[-1] local_reads_pair_paths[2] = local_reads_path + \ _os.path.sep + \ ENA_reads_pair_paths[2].split('/')[-1] downloaded_read_files[run_acc] = {} for f in (1,2): # ensure connection is still open while not check_connection(ftp): _sleep(0.5) print('Attempting to re-establish connection . . .') ftp = _FTP(ftp_server_url) # anonymous login print(ftp.login()) pass expected_checksum = md5s[f - 1] exists = _os.path.exists(local_reads_pair_paths[f]) if exists: print('File %s for %s exists locally: %s' % (f, run_acc, local_reads_pair_paths[f])) actual_checksum = calc_checksum(local_reads_pair_paths[f]) if actual_checksum == expected_checksum: print('File checksum matches: %s. Skipping download' % (expected_checksum)) downloaded_read_files[run_acc][f] = local_reads_pair_paths[f] continue else: print('Checksum mismatch') print('Downloading via %s: %s' % (ftp_server_url, ENA_reads_pair_paths[f])) res = ftp.retrbinary('RETR %s' % ENA_reads_pair_paths[f], open(local_reads_pair_paths[f], 'wb').write) print('FTP: %s' % res) print('Calculating checksum . . .') actual_checksum = calc_checksum(local_reads_pair_paths[f]) if actual_checksum == expected_checksum: print('File checksum matches: %s.' % (expected_checksum)) downloaded_read_files[run_acc][f] = local_reads_pair_paths[f] else: print('Checksum mismatch for: %s') if len(run_acc_list) > 1: # report durations, time left etc _report_time(start_time, cnum, len(run_acc_list)) if len(failed) > 0: print('WARNING: some accession numbers did not return a result from ENA') print('Try searching http://www.ebi.ac.uk/ena in a web-browser for:') print(', '.join(failed)) self.read_files = downloaded_read_files
def __init__(self, host, initpath='.'): from ftplib import FTP as _FTP self.conn = _FTP(host) self.conn.login('anonymous', '*****@*****.**') self.conn.cwd(initpath)
def getFromENA(self, run_acc_list, ftp_server_url='ftp.sra.ebi.ac.uk', local_reads_path=['reads']): ''' Given a list of 'run' accession numbers for paired end short read analyses, download the read files from the European Nucleotide Archive. If using a mirror server, supply an alternative for 'ftp_server_url'. 'local_reads_path' can be a path string or list or folder names. ''' if isinstance(local_reads_path, list): local_reads_path = _os.path.sep.join(local_reads_path) if not _os.path.exists(local_reads_path): _os.makedirs(local_reads_path) print('Logging in to %s' % ftp_server_url) ftp = _FTP(ftp_server_url) # anonymous login print(ftp.login()) def check_connection(ftp): try: print('FTP: %s' % ftp.voidcmd("NOOP")) # http://docs.python.org/2/library/ftplib.html return (True) except IOError as e: print( 'Seems to be a problem with the connection to FTP server:') print('I/O error({0}): {1}'.format(e.errno, e.strerror)) return (False) def calc_checksum(filepath): hasher = _md5() handle = open(filepath, 'rb') buff = handle.read(65536) while len(buff) > 0: hasher.update(buff) buff = handle.read(65536) return (hasher.hexdigest()) downloaded_read_files = {} start_time = _time.time() failed = [] for cnum, run_acc in enumerate(run_acc_list): query_url_base = 'http://www.ebi.ac.uk/ena/data/warehouse/search?query=' success = False tries = 0 max_tries = 5 while not success: rest_req = '"run_accession=%s"&result=read_run&fields=fastq_ftp,fastq_md5&display=report' % run_acc print('Sending query to ENA:\n%s' % rest_req) result = _urllib2.urlopen(query_url_base + rest_req).read() print('ENA accession numbers query result:\n%s' % result) if result.count('ERR') == 7: success = True else: print( 'Query result from ENA was unexpected on attempt %s of %s' % (tries, max_tries)) _time.sleep(0.5) tries += 1 if tries == max_tries: print( 'Attempt %s failed. Try again later and if problem persists, report bug.' % tries) failed += [run_acc] break #_sys.exit(1) if not success: continue md5s = result.split('\n')[-2].split('\t')[-1][:-1].split(';') ENA_paths = result.split('\n')[-2].split('\t')[-2][:-1].split(';') ENA_reads_pair_paths = {} ENA_reads_pair_paths[1] = ENA_paths[0].replace(ftp_server_url, '') ENA_reads_pair_paths[2] = ENA_paths[1].replace(ftp_server_url, '') local_reads_pair_paths = {} local_reads_pair_paths[1] = local_reads_path + \ _os.path.sep + \ ENA_reads_pair_paths[1].split('/')[-1] local_reads_pair_paths[2] = local_reads_path + \ _os.path.sep + \ ENA_reads_pair_paths[2].split('/')[-1] downloaded_read_files[run_acc] = {} for f in (1, 2): # ensure connection is still open while not check_connection(ftp): _sleep(0.5) print('Attempting to re-establish connection . . .') ftp = _FTP(ftp_server_url) # anonymous login print(ftp.login()) pass expected_checksum = md5s[f - 1] exists = _os.path.exists(local_reads_pair_paths[f]) if exists: print('File %s for %s exists locally: %s' % (f, run_acc, local_reads_pair_paths[f])) actual_checksum = calc_checksum(local_reads_pair_paths[f]) if actual_checksum == expected_checksum: print('File checksum matches: %s. Skipping download' % (expected_checksum)) downloaded_read_files[run_acc][ f] = local_reads_pair_paths[f] continue else: print('Checksum mismatch') print('Downloading via %s: %s' % (ftp_server_url, ENA_reads_pair_paths[f])) res = ftp.retrbinary( 'RETR %s' % ENA_reads_pair_paths[f], open(local_reads_pair_paths[f], 'wb').write) print('FTP: %s' % res) print('Calculating checksum . . .') actual_checksum = calc_checksum(local_reads_pair_paths[f]) if actual_checksum == expected_checksum: print('File checksum matches: %s.' % (expected_checksum)) downloaded_read_files[run_acc][f] = local_reads_pair_paths[ f] else: print('Checksum mismatch for: %s') if len(run_acc_list) > 1: # report durations, time left etc _report_time(start_time, cnum, len(run_acc_list)) if len(failed) > 0: print( 'WARNING: some accession numbers did not return a result from ENA' ) print( 'Try searching http://www.ebi.ac.uk/ena in a web-browser for:') print(', '.join(failed)) self.read_files = downloaded_read_files
def _create_ftp(self) -> _FTP: return _FTP(self.host, **self.__kwargs)