Esempio n. 1
0
def get_all_psps(basedir, exchange, kind):
    directory = basedir + '/' + exchange + '_' + kind
    if not _os.path.isdir(directory):
        _os.mkdir(directory)

    ftp = _FTP('ftp.abinit.org')  # connect to host, default port
    ftp.login()  # user anonymous, passwd anonymous@
    for i in range(1, 113):
        filename = psp_name(i, exchange, kind)
        if not _os.path.isfile(directory + '/' + filename):
            print('Getting...' + filename)
            nofile = True
            while nofile:
                try:
                    res = ftp.retrbinary('RETR ' + get_ftp_psp(i, exchange, kind) + filename,
                                         open(directory + '/' + filename, 'wb').write)
                    if _os.path.getsize(directory + '/' + filename) == 0:
                        _os.remove(directory + '/' + filename)
                        nofile = False
                    else:
                        nofile = False
                except ValueError:
                    print('Could not download ' + filename)
                    ftp.close()
                    if _os.path.isfile(directory + '/' + filename):
                        _os.remove(directory + '/' + filename)
                    ftp = _FTP('ftp.abinit.org')  # connect to host, default port
                    ftp.login()  # user anonymous, passwd anonymous@
                    nofile = False
    ftp.close()
Esempio n. 2
0
    def connect(self):
        with Timeout(self.timeout):
            # parse ipaddress with urllib, "//" needed
            result = urllib.parse.urlsplit('//' + self.host)
            if (result.port):
                ftp = _FTP()
                ftp.connect(result.hostname, result.port)
            else:
                ftp = _FTP(result.hostname)
            ftp.login(self.user, self.password)

        try:
            yield ftp
        finally:
            ftp.close()
Esempio n. 3
0
def list_order_files(user, order_id):
    """
    List the file in an ARM Archive order.

    Parameters
    ----------
    user : str
        Username
    order_id : str or int
        Order id.

    Returns
    -------
    files : list
        List of files in the order.

    """
    # log into the FTP server
    ftp = _FTP(_ARM_FTP)
    ftp.login()

    # change to the correct directory
    ftp.cwd(user)
    ftp.cwd(str(order_id))

    return ftp.nlst()
Esempio n. 4
0
def order_download(user, order_id, files=None):
    """
    Download files from a ARM Archive order.

    Files are stored in the current working directory, no return.

    Parameters
    ----------
    user : str
        Username
    order_id : str or int
        Order id.
    files : list, str or None, optional.
        Files to download, either a single file, a list of files, or None
        to retrieve all files in the order.

    """
    # log into the FTP server
    ftp = _FTP(_ARM_FTP)
    ftp.login()

    # change to the correct directory
    ftp.cwd(user)
    ftp.cwd(str(order_id))

    # grab the files
    if files is None:
        files = ftp.nlst()
    if isinstance(files, str):
        files = [files]
    for filename in files:
        print("Retrieving:", filename)
        ftp.retrbinary('RETR ' + filename, open(filename, 'wb').write)
    ftp.quit()
    return
Esempio n. 5
0
    def __init__(self, target, opts):
        """ init """

        Base.__init__(self, target, opts)

        self.ftp = _FTP(timeout=3)  # ftplib FTP instance

        return
Esempio n. 6
0
    def connect(self):
        with Timeout(self.timeout):
            ftp = _FTP(self.host)
            ftp.login(self.user, self.password)

        try:
            yield ftp
        finally:
            ftp.close()
Esempio n. 7
0
    def connect(self):
        with Timeout(self.timeout):
            ftp = _FTP(self.host)
            ftp.login(self.user, self.password)

        try:
            yield ftp
        finally:
            ftp.close()
Esempio n. 8
0
def _avail_files_dict(repository):

    # Login to the FTP and get available files
    iftp = _FTP(repository[0])
    iftp.login()
    # Go to subdirectories
    for idir in repository[1:]:
        iftp.cwd(idir)
    # Get the ls -l output

    afd = {tup[0]: tup[1] for tup in iftp.mlsd() if tup[0] not in ['.', '..']}
    iftp.close()

    return afd
Esempio n. 9
0
    def __init__(self, dwx_ftp_user='******',
                 dwx_ftp_pass='******',
                 dwx_ftp_hostname='<insert Darwinex Tick Data FTP host>',
                 dwx_ftp_port=21):

        if dwx_ftp_hostname[:6] == 'ftp://':
            dwx_ftp_hostname = dwx_ftp_hostname[6:]

        # # Dictionary DB to hold dictionary objects in FX/Hour format
        # self._asset_db = {}

        self._ftpObj = _FTP(dwx_ftp_hostname)
        self._ftpObj.login(dwx_ftp_user, dwx_ftp_pass)
        self._virtual_dl = None
        self.available_assets = self._dir('')
        self._widgets_available = True
        self.num_retries = 3
        self.await_time = 10
        print('Connected Darwinex Ticks Data Server')
Esempio n. 10
0
def list_orders_ready(user):
    """
    List all Archive orders which are ready for download.

    Parameters
    ----------
    user : str
        Username

    Returns
    -------
    order_ids : list
        List of open order ids for the user.

    """
    # log into the FTP server, change to users directory
    ftp = _FTP(_ARM_FTP)
    ftp.login()
    ftp.cwd(user)
    return ftp.nlst()
Esempio n. 11
0
    def getFromENA(self, run_acc_list, 
                         ftp_server_url = 'ftp.sra.ebi.ac.uk', 
                         local_reads_path = ['reads']):
        '''
        Given a list of 'run' accession numbers for paired end short read analyses, 
        download the read files from the European Nucleotide Archive.

        If using a mirror server, supply an alternative for 'ftp_server_url'.

        'local_reads_path' can be a path string or list or folder names.
        '''
        if isinstance(local_reads_path, list):
            local_reads_path = _os.path.sep.join(local_reads_path)

        if not _os.path.exists(local_reads_path):
            _os.makedirs(local_reads_path)

        print('Logging in to %s' % ftp_server_url)
        ftp = _FTP(ftp_server_url)
        # anonymous login
        print(ftp.login())

        def check_connection(ftp):
            try:
                print('FTP: %s' % ftp.voidcmd("NOOP"))
                # http://docs.python.org/2/library/ftplib.html
                return(True)
            except IOError as e:
                print('Seems to be a problem with the connection to FTP server:')
                print('I/O error({0}): {1}'.format(e.errno, e.strerror) )
                return(False)

        def calc_checksum(filepath):
            hasher = _md5()
            handle = open(filepath, 'rb')
            buff = handle.read(65536)
            while len(buff) > 0:
                hasher.update(buff)
                buff = handle.read(65536)
            
            return(hasher.hexdigest())

        downloaded_read_files = {}

        start_time = _time.time()
        failed = []
        for cnum,run_acc in enumerate(run_acc_list):
            
            query_url_base = 'http://www.ebi.ac.uk/ena/data/warehouse/search?query='
            success = False
            tries = 0
            max_tries = 5
            while not success:
                rest_req = '"run_accession=%s"&result=read_run&fields=fastq_ftp,fastq_md5&display=report' % run_acc
                print('Sending query to ENA:\n%s' % rest_req)
                result = _urllib2.urlopen(query_url_base + rest_req).read()
                print('ENA accession numbers query result:\n%s' % result)
                if result.count('ERR') == 7:
                    success = True
                else:
                    print('Query result from ENA was unexpected on attempt %s of %s' % (tries, max_tries))
                    _time.sleep(0.5)
                    tries += 1
                    if tries == max_tries:
                        print('Attempt %s failed. Try again later and if problem persists, report bug.' % tries)
                        failed += [run_acc]
                        break
                        #_sys.exit(1)
            
            if not success:
                continue
            
            md5s = result.split('\n')[-2].split('\t')[-1][:-1].split(';')
            
            ENA_paths = result.split('\n')[-2].split('\t')[-2][:-1].split(';')
            
            ENA_reads_pair_paths = {}
            ENA_reads_pair_paths[1] = ENA_paths[0].replace(ftp_server_url, '')
            ENA_reads_pair_paths[2] = ENA_paths[1].replace(ftp_server_url, '')
            
            local_reads_pair_paths = {}
            local_reads_pair_paths[1] = local_reads_path + \
                                        _os.path.sep + \
                                        ENA_reads_pair_paths[1].split('/')[-1]
            local_reads_pair_paths[2] = local_reads_path + \
                                        _os.path.sep + \
                                        ENA_reads_pair_paths[2].split('/')[-1]
            
            downloaded_read_files[run_acc] = {}
            
            for f in (1,2):
                # ensure connection is still open
                while not check_connection(ftp):
                    _sleep(0.5)
                    print('Attempting to re-establish connection . . .')
                    ftp = _FTP(ftp_server_url)
                    # anonymous login
                    print(ftp.login())
                    pass
                
                expected_checksum = md5s[f - 1]
                
                exists = _os.path.exists(local_reads_pair_paths[f])
                if exists:
                    print('File %s for %s exists locally: %s' % (f, run_acc, local_reads_pair_paths[f]))
                    actual_checksum = calc_checksum(local_reads_pair_paths[f])
                    if actual_checksum == expected_checksum:
                        print('File checksum matches: %s. Skipping download' % (expected_checksum))
                        downloaded_read_files[run_acc][f] = local_reads_pair_paths[f]
                        continue
                    else:
                        print('Checksum mismatch')
                
                print('Downloading via %s: %s' % (ftp_server_url, ENA_reads_pair_paths[f]))
                res = ftp.retrbinary('RETR %s' % ENA_reads_pair_paths[f], 
                                     open(local_reads_pair_paths[f], 'wb').write)
                print('FTP: %s' % res)
                
                print('Calculating checksum . . .')
                actual_checksum = calc_checksum(local_reads_pair_paths[f])
                
                if actual_checksum == expected_checksum:
                    print('File checksum matches: %s.' % (expected_checksum))
                    downloaded_read_files[run_acc][f] = local_reads_pair_paths[f]
                else:
                    print('Checksum mismatch for: %s')
            
            if len(run_acc_list) > 1:
                # report durations, time left etc
                _report_time(start_time, cnum, len(run_acc_list))

        if len(failed) > 0:
            print('WARNING: some accession numbers did not return a result from ENA')
            print('Try searching http://www.ebi.ac.uk/ena in a web-browser for:')
            print(', '.join(failed))

        self.read_files = downloaded_read_files
Esempio n. 12
0
 def __init__(self, host, initpath='.'):
     from ftplib import FTP as _FTP
     self.conn = _FTP(host)
     self.conn.login('anonymous', '*****@*****.**')
     self.conn.cwd(initpath)
Esempio n. 13
0
    def getFromENA(self,
                   run_acc_list,
                   ftp_server_url='ftp.sra.ebi.ac.uk',
                   local_reads_path=['reads']):
        '''
        Given a list of 'run' accession numbers for paired end short read analyses, 
        download the read files from the European Nucleotide Archive.

        If using a mirror server, supply an alternative for 'ftp_server_url'.

        'local_reads_path' can be a path string or list or folder names.
        '''
        if isinstance(local_reads_path, list):
            local_reads_path = _os.path.sep.join(local_reads_path)

        if not _os.path.exists(local_reads_path):
            _os.makedirs(local_reads_path)

        print('Logging in to %s' % ftp_server_url)
        ftp = _FTP(ftp_server_url)
        # anonymous login
        print(ftp.login())

        def check_connection(ftp):
            try:
                print('FTP: %s' % ftp.voidcmd("NOOP"))
                # http://docs.python.org/2/library/ftplib.html
                return (True)
            except IOError as e:
                print(
                    'Seems to be a problem with the connection to FTP server:')
                print('I/O error({0}): {1}'.format(e.errno, e.strerror))
                return (False)

        def calc_checksum(filepath):
            hasher = _md5()
            handle = open(filepath, 'rb')
            buff = handle.read(65536)
            while len(buff) > 0:
                hasher.update(buff)
                buff = handle.read(65536)

            return (hasher.hexdigest())

        downloaded_read_files = {}

        start_time = _time.time()
        failed = []
        for cnum, run_acc in enumerate(run_acc_list):

            query_url_base = 'http://www.ebi.ac.uk/ena/data/warehouse/search?query='
            success = False
            tries = 0
            max_tries = 5
            while not success:
                rest_req = '"run_accession=%s"&result=read_run&fields=fastq_ftp,fastq_md5&display=report' % run_acc
                print('Sending query to ENA:\n%s' % rest_req)
                result = _urllib2.urlopen(query_url_base + rest_req).read()
                print('ENA accession numbers query result:\n%s' % result)
                if result.count('ERR') == 7:
                    success = True
                else:
                    print(
                        'Query result from ENA was unexpected on attempt %s of %s'
                        % (tries, max_tries))
                    _time.sleep(0.5)
                    tries += 1
                    if tries == max_tries:
                        print(
                            'Attempt %s failed. Try again later and if problem persists, report bug.'
                            % tries)
                        failed += [run_acc]
                        break
                        #_sys.exit(1)

            if not success:
                continue

            md5s = result.split('\n')[-2].split('\t')[-1][:-1].split(';')

            ENA_paths = result.split('\n')[-2].split('\t')[-2][:-1].split(';')

            ENA_reads_pair_paths = {}
            ENA_reads_pair_paths[1] = ENA_paths[0].replace(ftp_server_url, '')
            ENA_reads_pair_paths[2] = ENA_paths[1].replace(ftp_server_url, '')

            local_reads_pair_paths = {}
            local_reads_pair_paths[1] = local_reads_path + \
                                        _os.path.sep + \
                                        ENA_reads_pair_paths[1].split('/')[-1]
            local_reads_pair_paths[2] = local_reads_path + \
                                        _os.path.sep + \
                                        ENA_reads_pair_paths[2].split('/')[-1]

            downloaded_read_files[run_acc] = {}

            for f in (1, 2):
                # ensure connection is still open
                while not check_connection(ftp):
                    _sleep(0.5)
                    print('Attempting to re-establish connection . . .')
                    ftp = _FTP(ftp_server_url)
                    # anonymous login
                    print(ftp.login())
                    pass

                expected_checksum = md5s[f - 1]

                exists = _os.path.exists(local_reads_pair_paths[f])
                if exists:
                    print('File %s for %s exists locally: %s' %
                          (f, run_acc, local_reads_pair_paths[f]))
                    actual_checksum = calc_checksum(local_reads_pair_paths[f])
                    if actual_checksum == expected_checksum:
                        print('File checksum matches: %s. Skipping download' %
                              (expected_checksum))
                        downloaded_read_files[run_acc][
                            f] = local_reads_pair_paths[f]
                        continue
                    else:
                        print('Checksum mismatch')

                print('Downloading via %s: %s' %
                      (ftp_server_url, ENA_reads_pair_paths[f]))
                res = ftp.retrbinary(
                    'RETR %s' % ENA_reads_pair_paths[f],
                    open(local_reads_pair_paths[f], 'wb').write)
                print('FTP: %s' % res)

                print('Calculating checksum . . .')
                actual_checksum = calc_checksum(local_reads_pair_paths[f])

                if actual_checksum == expected_checksum:
                    print('File checksum matches: %s.' % (expected_checksum))
                    downloaded_read_files[run_acc][f] = local_reads_pair_paths[
                        f]
                else:
                    print('Checksum mismatch for: %s')

            if len(run_acc_list) > 1:
                # report durations, time left etc
                _report_time(start_time, cnum, len(run_acc_list))

        if len(failed) > 0:
            print(
                'WARNING: some accession numbers did not return a result from ENA'
            )
            print(
                'Try searching http://www.ebi.ac.uk/ena in a web-browser for:')
            print(', '.join(failed))

        self.read_files = downloaded_read_files
Esempio n. 14
0
 def _create_ftp(self) -> _FTP:
     return _FTP(self.host, **self.__kwargs)