コード例 #1
0
    def test_report_remember_cookie_duration(self, mock_reportbuild_run):

        duration = self.app.config['PERMANENT_SESSION_LIFETIME']
        # test the page pdf. We are unauthorized, so this should give us error:
        with self.app.test_request_context():
            app = self.app.test_client()
            # If we add routes with the slash at the end, we should 
            # add follow_redirect=True to app.get. See
            # http://flask.pocoo.org/docs/0.11/quickstart/#routing
            res = app.get("/ZE_2012/pdf", follow_redirects=True)
            # few stupid asserts, the main test is not raising
            # we should ahve an html page:
            assert res.status_code == 401
            assert mock_reportbuild_run.call_count == 0
        
            # now try to login:
            # with a non-registered email
            res = app.post("/ZE_2012/login", data={'email' :'abc'})
            assert res.status_code == 401
            # thus, we do not access the pdf creation:
            res = app.get("/ZE_2012/pdf", follow_redirects=True)
            # few stupid asserts, the main test is not raising
            # we should ahve an html page:
            assert res.status_code == 401
            assert mock_reportbuild_run.call_count == 0

            # now try to login:
            # with a registered email and wrong permission
            res = app.post("/ZE_2012/login", data={'email' :'*****@*****.**'})
            assert res.status_code == 403
            # thus, we do not access the pdf creation:
            res = app.get("/ZE_2012/pdf", follow_redirects=True)
            # few stupid asserts, the main test is not raising
            # we should ahve an html page:
            assert res.status_code == 401
            assert mock_reportbuild_run.call_count == 0

            # now try to login:
            # with another non registered email
            res = app.post("/ZE_2012/login", data={'email' :'*****@*****.**'})
            assert res.status_code == 200

            # check that the current user has the fields written
            with models.session(self.app) as session:
                user = session.query(models.User).filter(models.User.email == '*****@*****.**').first()
                assert user.editing_path is not None
                assert user.login_date is not None

            # sleep two seconds and see that we should have been logged out from the session
            # (we set REMEMBER_COOKIE_DURATION = 1 second)
            time.sleep(duration.total_seconds()+1)

            # Note that we need to setup urlread for the arcgis image, because we mocked it
            # (FIXME: we mocked in gfzreport.templates.network.core.utils.urllib2.urlopen,
            # why is it mocked in map module?!!!)
            # The signature is:
            # _get_urlopen_sideeffect(geofon_retval=None, others_retval=None):
            # Thus, we setup others_retval=URLError, which means that if 'geofon' is not in url
            # (which is the case for arcgis query) and URLException is raised.
            # This way, the map is generated with drawcostallines
            # and the pdf is created. Keep in mind that pdflatex will raise in any case
            self.mock_urlopen.side_effect = _get_urlopen_sideeffect(None, URLError('wat'))
            res = app.get("/ZE_2012/pdf", follow_redirects=True)
            # few stupid asserts, the main test is not raising
            # we should have an html page. But login session should have been expired!
            assert res.status_code == 401
            assert mock_reportbuild_run.call_count == 0


            # now try to login again:
            # with another non registered email
            usrname = 'user2_ok'
            res = app.post("/ZE_2012/login", data={'email' :'*****@*****.**' % usrname})
            assert res.status_code == 200
            
            # now try to login with another user. This should fail as we are still in
            # the session duration:
            # with another non registered email
            res = app.post("/ZE_2012/login", data={'email' :'*****@*****.**'})
            assert res.status_code == 409  # conflict
            assert ("Conflict: user '%s' is editing the same report (or forgot to log out): "
                    "by default, his/her session will expire in 0:00:" % usrname) in json.loads(res.data)['message']
            # sleep two seconds and see that we should have been logged out from the session
            # (we set REMEMBER_COOKIE_DURATION = 1 second)
            time.sleep(duration.total_seconds()+1)
            # now try to login with the same user. As first user's session time expired,
            # we should be able to login:
            res = app.post("/ZE_2012/login", data={'email' :'*****@*****.**'})
            assert res.status_code == 200
コード例 #2
0
    def do_open(self, http_class, req):
        """Return an addinfourl object for the request, using http_class.

        http_class must implement the HTTPConnection API from httplib.
        The addinfourl return value is a file-like object.  It also
        has methods and attributes including:
            - info(): return a mimetools.Message object for the headers
            - geturl(): return the original request URL
            - code: HTTP status code
        """
        host_port = req.get_host()
        if not host_port:
            raise URLError('no host given')

        h = http_class(host_port, timeout=req.timeout)
        h.set_debuglevel(self._debuglevel)

        headers = dict(req.headers)
        headers.update(req.unredirected_hdrs)
        # We want to make an HTTP/1.1 request, but the addinfourl
        # class isn't prepared to deal with a persistent connection.
        # It will try to read all remaining data from the socket,
        # which will block while the server waits for the next request.
        # So make sure the connection gets closed after the (only)
        # request.
        headers[b"Connection"] = b"close"
        # httplib in python 2 needs str() not unicode() for all request
        # parameters
        headers = {
            str(name.title()): str(val)
            for name, val in headers.items()
        }

        if req._tunnel_host:
            set_tunnel = h.set_tunnel if hasattr(
                h, "set_tunnel") else h._set_tunnel
            tunnel_headers = {}
            proxy_auth_hdr = b"Proxy-Authorization"
            if proxy_auth_hdr in headers:
                tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
                # Proxy-Authorization should not be sent to origin server.
                del headers[proxy_auth_hdr]
            set_tunnel(req._tunnel_host, headers=tunnel_headers)

        try:
            h.request(str(req.get_method()), str(req.get_selector()), req.data,
                      headers)
            r = h.getresponse()
        except socket.error as err:  # XXX what error?
            raise URLError(err)

        # Pick apart the HTTPResponse object to get the addinfourl
        # object initialized properly.

        # Wrap the HTTPResponse object in socket's file object adapter
        # for Windows.  That adapter calls recv(), so delegate recv()
        # to read().  This weird wrapping allows the returned object to
        # have readline() and readlines() methods.

        # XXX It might be better to extract the read buffering code
        # out of socket._fileobject() and into a base class.

        r.recv = r.read
        fp = create_readline_wrapper(r)

        resp = closeable_response(fp, r.msg, req.get_full_url(), r.status,
                                  r.reason, getattr(r, 'version', None))
        return resp
コード例 #3
0
 def test_hud_data__error(self, mock_urlopen):
     """ Testing hud_data, raise a URLError exception """
     mock_urlopen.side_effect = URLError('URLError exception')
     response = self.cmd.hud_data('services')
     self.assertTrue('URLError exception' in self.cmd.errors)
     self.assertTrue(response == [])
コード例 #4
0
ファイル: test_campme.py プロジェクト: nurih/ccbot
 def test_invoke_value_error(self, get_data):
     get_data.side_effect = URLError('Nope!')
     actual = get_target().invoke(COMMAND_WITH_VERB, "fake_user")
     assert actual == SlackResponse.text("('Nope!',)")
コード例 #5
0
            try:
                remote_api_stub.MaybeInvokeAuthentication()
            except HTTPError, e:
                if not have_appserver:
                    print 'Retrying in %d seconds...' % retry_delay
                time.sleep(retry_delay)
                retry_delay *= 2
            else:
                break
        else:
            try:
                remote_api_stub.MaybeInvokeAuthentication()
            except HTTPError, e:
                raise URLError("%s\n"
                               "Couldn't reach remote_api handler at %s.\n"
                               "Make sure you've deployed your project and "
                               "installed a remote_api handler in app.yaml." %
                               (e, remote_url))
        logging.info('Now using the remote datastore for "%s" at %s' %
                     (self.remote_app_id, remote_url))

    def flush(self):
        """Helper function to remove the current datastore and re-open the stubs"""
        if self.remote:
            import random, string
            code = ''.join(
                [random.choice(string.ascii_letters) for x in range(4)])
            print '\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
            print '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
            print "Warning! You're about to delete the *production* datastore!"
            print 'Only models defined in your INSTALLED_APPS can be removed!'
コード例 #6
0
def test_get_target_region_error_response(mocker, capsys):
    _test_get_target_region_error(mocker, capsys, error=URLError('test error'))
コード例 #7
0
ファイル: test_export.py プロジェクト: pombredanne/lunr
 def raise_exc(*args, **kwargs):
     e = base.NodeError(MockRequest(), URLError("Its gone!"))
     e.code = 404
     e.status = '404 Not Found'
     raise e
コード例 #8
0
 def require_string(val):
     """Check that provided input is a string"""
     if not (isinstance(val, str) or isinstance(val, unicode)):
         code = web_code('Invalid input')
         raise URLError('code=%s' % code)
コード例 #9
0
def main():
    ARGS = parse_args(sys.argv[1:])

    logging.basicConfig(level=logging.INFO)
    print parse_reddit_argument(ARGS.reddit)

    TOTAL = DOWNLOADED = ERRORS = SKIPPED = FAILED = 0
    FINISHED = False

    # Create the specified directory if it doesn't already exist.
    if not pathexists(ARGS.dir):
        mkdir(ARGS.dir)

    # If a regex has been specified, compile the rule (once)
    RE_RULE = None
    if ARGS.regex:
        RE_RULE = re.compile(ARGS.regex)

    # compile reddit comment url to check if url is one of them
    reddit_comment_regex = re.compile(r'.*reddit\.com\/r\/(.*?)\/comments')

    LAST = ARGS.last

    start_time = None
    ITEM = None

    sort_type = ARGS.sort_type
    if sort_type:
        sort_type = sort_type.lower()

    while not FINISHED:
        ITEMS = getitems(ARGS.reddit,
                         multireddit=ARGS.multireddit,
                         previd=LAST,
                         reddit_sort=sort_type)

        # measure time and set the program to wait 4 second between request
        # as per reddit api guidelines
        end_time = time.clock()

        if start_time is not None:
            elapsed_time = end_time - start_time

            if elapsed_time <= 4:  # throttling
                time.sleep(4 - elapsed_time)

        start_time = time.clock()

        if not ITEMS:
            # No more items to process
            break

        for ITEM in ITEMS:
            TOTAL += 1

            # not downloading if url is reddit comment
            if ('reddit.com/r/' + ARGS.reddit + '/comments/' in ITEM['url'] or
                    re.match(reddit_comment_regex, ITEM['url']) is not None):
                print '    Skip:[{}]'.format(ITEM['url'])
                continue

            if ITEM['score'] < ARGS.score:
                if ARGS.verbose:
                    print '    SCORE: {} has score of {}'.format(
                        ITEM['id'], ITEM['score'])
                    'which is lower than required score of {}.'.format(
                        ARGS.score)

                SKIPPED += 1
                continue
            elif ARGS.sfw and ITEM['over_18']:
                if ARGS.verbose:
                    print '    NSFW: %s is marked as NSFW.' % (ITEM['id'])

                SKIPPED += 1
                continue
            elif ARGS.nsfw and not ITEM['over_18']:
                if ARGS.verbose:
                    print '    Not NSFW, skipping %s' % (ITEM['id'])

                SKIPPED += 1
                continue
            elif ARGS.regex and not re.match(RE_RULE, ITEM['title']):
                if ARGS.verbose:
                    print '    Regex match failed'

                SKIPPED += 1
                continue
            elif ARGS.skipAlbums and 'imgur.com/a/' in ITEM['url']:
                if ARGS.verbose:
                    print '    Album found, skipping %s' % (ITEM['id'])

                SKIPPED += 1
                continue

            if ARGS.title_contain and ARGS.title_contain.lower(
            ) not in ITEM['title'].lower():
                if ARGS.verbose:
                    print '    Title not contain "{}",'.format(
                        ARGS.title_contain)
                    'skipping {}'.format(ITEM['id'])

                SKIPPED += 1
                continue

            FILECOUNT = 0
            try:
                URLS = extract_urls(ITEM['url'])
            except Exception:
                _log.exception("Failed to extract urls for %r", URLS)
                continue
            for URL in URLS:
                try:
                    # Find gfycat if requested
                    if URL.endswith('gif') and ARGS.mirror_gfycat:
                        check = gfycat().check(URL)
                        if check.get("urlKnown"):
                            URL = check.get('webmUrl')

                    # Trim any http query off end of file extension.
                    FILEEXT = pathsplitext(URL)[1]
                    if '?' in FILEEXT:
                        FILEEXT = FILEEXT[:FILEEXT.index('?')]

                    # Only append numbers if more than one file
                    FILENUM = ('_%d' % FILECOUNT if len(URLS) > 1 else '')

                    # create filename based on given input from user
                    if ARGS.filename_format == 'url':
                        FILENAME = '%s%s%s' % (pathsplitext(
                            pathbasename(URL))[0], '', FILEEXT)
                    elif ARGS.filename_format == 'title':
                        FILENAME = '%s%s%s' % (slugify(
                            ITEM['title']), FILENUM, FILEEXT)
                        if len(FILENAME) >= 256:
                            shortened_item_title = slugify(
                                ITEM['title'])[:256 - len(FILENAME)]
                            FILENAME = '%s%s%s' % (shortened_item_title,
                                                   FILENUM, FILEEXT)
                    else:
                        FILENAME = '%s%s%s' % (ITEM['id'], FILENUM, FILEEXT)
                    # join file with directory
                    FILEPATH = pathjoin(ARGS.dir, FILENAME)

                    # Improve debuggability list URL before download too.
                    # url may be wrong so skip that
                    if URL.encode('utf-8') == 'http://':
                        raise URLError('Url is empty')
                    else:
                        text_templ = '    Attempting to download URL[{}] as [{}].'
                        print text_templ.format(URL.encode('utf-8'),
                                                FILENAME.encode('utf-8'))

                    # Download the image
                    try:
                        download_from_url(URL, FILEPATH)
                        # Image downloaded successfully!
                        print '    Sucessfully downloaded URL [%s] as [%s].' % (
                            URL, FILENAME)
                        DOWNLOADED += 1
                        FILECOUNT += 1

                    except FileExistsException, e:
                        print '    %s' % (e)
                        ERRORS += 1
                        if ARGS.update:
                            print '    Update complete, exiting.'
                            FINISHED = True
                            break
                    except Exception, e:
                        print '    %s' % str(e)
                        ERRORS += 1

                    if ARGS.num and DOWNLOADED >= ARGS.num:
                        FINISHED = True
                        break
                except WrongFileTypeException as ERROR:
                    print '    %s' % (ERROR)
                    _log_wrongtype(url=URL,
                                   target_dir=ARGS.dir,
                                   filecount=FILECOUNT,
                                   _downloaded=DOWNLOADED,
                                   filename=FILENAME)
                    SKIPPED += 1
                except FileExistsException as ERROR:
                    print '    %s' % (ERROR)
                    ERRORS += 1
                    if ARGS.update:
                        print '    Update complete, exiting.'
                        FINISHED = True
                        break
コード例 #10
0
ファイル: test_export.py プロジェクト: pombredanne/lunr
 def raise_exc(*args, **kwargs):
     e = base.NodeError(MockRequest(), URLError("somthing bad"))
     e.code = 400
     e.status = '400 something bad'
     raise e
コード例 #11
0
 def raise_exc(*args, **kwargs):
     raise URLError("something bad")
コード例 #12
0
 def __init__(self, request, timeout=None):
     raise URLError('blah')
コード例 #13
0
 def raise_exc(*args, **kwargs):
     e = base.NodeError(MockRequest(), URLError("fake 404"))
     e.code = 404
     raise e
コード例 #14
0
 def raise_exc(self, node, method, path, **kwargs):
     node_request_path.append(path)
     raise base.NodeError(MockRequest(), URLError("something bad"))
コード例 #15
0
ファイル: oai.py プロジェクト: pombredanne/sync-oai
                        print "503-error waiting the suggested %s seconds" % retry
                        for x in range(retry):
                            time.sleep(1)
                            print(x + 1)
                else:
                    raise IOError

            except AttributeError, e:
                print "Attribute Error (xml?) %s" % e

            except ParseError, e:
                print "ParseError %s" % e

            except URLError, e:
                raise URLError(
                    "While opening URL: %s with parameters %s an error turned up %s"
                    % (self.endpoint, params, e))

    def buildHeader(self, header_node):
        """extract header information of header_node into Header object"""
        identifier = None
        datestamp = None
        isdeleted = False
        for children in header_node:
            if children.tag == '{' + OAI_NS + '}identifier':
                identifier = children.text
            elif children.tag == '{' + OAI_NS + '}datestamp':
                if re.match(r"\d\d\d\d\-\d\d\-\d\d$", children.text):
                    children.text += "T00:00:00Z"
                if re.match(
                        r"\d\d\d\d\-\d\d\-\d\dT\d\d:\d\d(:\d\d)?(Z|[+-]\d\d:\d\d)$",
コード例 #16
0
            rpc_server_factory=rpc_server_factory)
        retry_delay = 1
        while retry_delay <= 16:
            try:
                remote_api_stub.MaybeInvokeAuthentication()
            except HTTPError, e:
                if not have_appserver:
                    logging.info("Retrying in %d seconds..." % retry_delay)
                time.sleep(retry_delay)
                retry_delay *= 2
            else:
                break
        else:
            try:
                remote_api_stub.MaybeInvokeAuthentication()
            except HTTPError, e:
                raise URLError("%s\n"
                               "Couldn't reach remote_api handler at %s.\n"
                               "Make sure you've deployed your project and "
                               "installed a remote_api handler in app.yaml. "
                               "Note that login is only supported for "
                               "Google Accounts. Make sure you've configured "
                               "the correct authentication method in the "
                               "App Engine Dashboard." % (e, remote_url))
        logging.info("Now using the remote datastore for '%s' at %s." %
                     (connection.remote_app_id, remote_url))
        self.active_stubs = 'remote'


stub_manager = StubManager()
コード例 #17
0
 def get_readable_fileobj_mockreturn(filename, **kwargs):
     e = URLError('timeout')
     e.reason = socket.timeout()
     raise e
     yield True
コード例 #18
0
ファイル: manuf.py プロジェクト: trodix/sysbus
    def update(self,
               manuf_url=None,
               wfa_url=None,
               manuf_name=None,
               refresh=True):
        """Update the Wireshark OUI database to the latest version.

        Args:
            manuf_url (str): URL pointing to OUI database. Defaults to database located at
                code.wireshark.org.
            manuf_name (str): Location to store the new OUI database. Defaults to "manuf" in the
                same directory.
            refresh (bool): Refresh the database once updated. Defaults to True. Uses database
                stored at manuf_name.

        Raises:
            URLError: If the download fails

        """
        if not manuf_url:
            manuf_url = self.MANUF_URL
        if not manuf_name:
            manuf_name = self._manuf_name

        # Retrieve the new database
        try:
            response = urlopen(manuf_url)
        except URLError:
            raise URLError("Failed downloading OUI database")

        # Parse the response
        if response.code == 200:
            with open(manuf_name, "wb") as write_file:
                write_file.write(response.read())
            if refresh:
                self.refresh(manuf_name)
        else:
            err = "{0} {1}".format(response.code, response.msg)
            raise URLError("Failed downloading database: {0}".format(err))

        response.close()
        if not wfa_url:
            wfa_url = self.WFA_URL

        # Append WFA to new database
        try:
            response = urlopen(wfa_url)
        except URLError:
            raise URLError("Failed downloading WFA database")

        # Parse the response
        if response.code == 200:
            with open(manuf_name, "ab") as write_file:
                write_file.write(response.read())
            if refresh:
                self.refresh(manuf_name)
        else:
            err = "{0} {1}".format(response.code, response.msg)
            raise URLError("Failed downloading database: {0}".format(err))

        response.close()
コード例 #19
0
def test_get_geo_ip_url_error(Client_mock):
    Client_mock.get_soap_client_service.side_effect = URLError('')
    result = get_geo_ip('1.2.3.4')
    Client_mock.get_soap_client_service.assert_called_once()
    assert None == result
コード例 #20
0
    def _request(cls,
                 url,
                 post_data=None,
                 timeout=REQUEST_TIMEOUT,
                 attempts=REQUEST_ATTEMPTS):
        # change fb__explicitly_shared to fb:explicitly_shared
        if post_data:
            post_data = dict(
                (k.replace('__', ':'), v) for k, v in post_data.items())

        logger.info('requesting url %s with post data %s', url, post_data)
        post_request = (post_data is not None or 'method=post' in url)

        if post_request and facebook_settings.FACEBOOK_READ_ONLY:
            logger.info('running in readonly mode')
            response = dict(id=123456789, setting_read_only=True)
            return response

        # nicely identify ourselves before sending the request
        opener = build_opener()
        opener.addheaders = [('User-agent', 'Open Facebook Python')]

        # get the statsd path to track response times with
        path = urlparse(url).path
        statsd_path = path.replace('.', '_')

        # give it a few shots, connection is buggy at times
        timeout_mp = 0
        while attempts:
            # gradually increase the timeout upon failure
            timeout_mp += 1
            extended_timeout = timeout * timeout_mp
            response_file = None
            encoded_params = encode_params(post_data) if post_data else None
            post_string = (urlencode(encoded_params) if post_data else None)
            try:
                start_statsd('facebook.%s' % statsd_path)

                try:
                    response_file = opener.open(url,
                                                post_string,
                                                timeout=extended_timeout)
                    response = response_file.read().decode('utf8')
                except (HTTPError, ) as e:
                    response_file = e
                    response = response_file.read().decode('utf8')
                    # Facebook sents error codes for many of their flows
                    # we still want the json to allow for proper handling
                    msg_format = 'FB request, error type %s, code %s'
                    logger.warn(msg_format, type(e), getattr(e, 'code', None))
                    # detect if its a server or application error
                    server_error = cls.is_server_error(e, response)
                    if server_error:
                        # trigger a retry
                        raise URLError('Facebook is down %s' % response)
                break
            except (HTTPError, URLError, ssl.SSLError) as e:
                # These are often temporary errors, so we will retry before
                # failing
                error_format = 'Facebook encountered a timeout (%ss) or error %s'
                logger.warn(error_format, extended_timeout, unicode(e))
                attempts -= 1
                if not attempts:
                    # if we have no more attempts actually raise the error
                    error_instance = facebook_exceptions.convert_unreachable_exception(
                        e)
                    error_msg = 'Facebook request failed after several retries, raising error %s'
                    logger.warn(error_msg, error_instance)
                    raise error_instance
            finally:
                if response_file:
                    response_file.close()
                stop_statsd('facebook.%s' % statsd_path)

        # Faceboook response is either
        # Valid json
        # A string which is a querydict (a=b&c=d...etc)
        # A html page stating FB is having trouble (but that shouldnt reach
        # this part of the code)
        try:
            parsed_response = json.loads(response)
            logger.info('facebook send response %s' % parsed_response)
        except Exception as e:
            # using exception because we need to support multiple json libs :S
            parsed_response = QueryDict(response, True)
            logger.info('facebook send response %s' % parsed_response)

        if parsed_response and isinstance(parsed_response, dict):
            # of course we have two different syntaxes
            if parsed_response.get('error'):
                cls.raise_error(parsed_response['error']['type'],
                                parsed_response['error']['message'],
                                parsed_response['error'].get('code'))
            elif parsed_response.get('error_code'):
                cls.raise_error(parsed_response['error_code'],
                                parsed_response['error_msg'])

        return parsed_response
コード例 #21
0
 def from_format(cls, format):
     format = format.lower()
     if format not in cls.VALID_FORMATS:
         raise URLError("Unsupported export format: %s!" % format)
     return cls(format, **cls.FORMAT_DICT[format])
コード例 #22
0
def _test_get_target_region_from_dns_format(mocker, config):
    mocker.patch('mount_efs.get_aws_ec2_metadata_token', return_value=None)
    mocker.patch('mount_efs.urlopen', side_effect=URLError('test error'))
    assert TARGET_REGION == mount_efs.get_target_region(config)
コード例 #23
0
def download_daily_data(download_dir, hemisphere='N'):
    '''
    Required:   <download_dir>    directory to store the data files
    Optional:   <hemisphere>      one of 'N'/'North' or 'S'/'South', default is 'N'
    '''
    today = datetime.today()
    if hemisphere.upper() in ['SOUTH', 'S']:
        url = 'ftp://sidads.colorado.edu/DATASETS/NOAA/G02135/south/daily/data/'
    else:
        url = 'ftp://sidads.colorado.edu/DATASETS/NOAA/G02135/north/daily/data/'
    try:
        ftp_listing = urlopen(url).read().splitlines()
    except URLError:
        raise URLError("Cannot connect to NSIDC FTP url: {}".format(url))
    data_files = [f.split()[-1] for f in ftp_listing[:4] if f.endswith('.csv')]
    #  climatology file (.csv)
    try:
        climatology = [
            url + "/" + d for d in data_files if 'climatology' in d
        ][0]
    except IndexError:
        raise Exception(
            "Cannot locate climatology dataset at FTP: {}".format(url))
    target_climo = os.path.join(download_dir, os.path.basename(climatology))
    you_gotta_download = False
    if not os.path.isfile(target_climo):
        you_gotta_download = True
    else:
        # Re-download the file if it is >=12 hours older relative to "today" at start of script execution
        modtime = datetime.fromtimestamp(os.path.getmtime(target_climo))
        if modtime <= today - timedelta(0.5):
            you_gotta_download = True
    if you_gotta_download:  # then go get it!
        print "Retrieving {}-hemisphere climatology data from NSIDC FTP...".format(
            hemisphere)
        remote_data = urlopen(climatology)
        with open(target_climo, 'wb') as local_data:
            copyfileobj(remote_data, local_data)
        remote_data.close()
    else:
        print "Existing local climatology files for {}-hemisphere are fresh enough...".format(
            hemisphere)
    # daily data file (.csv)
    try:
        daily = [url + "/" + d for d in data_files if 'daily' in d][0]
    except IndexError:
        raise Exception("Cannot locate daily dataset at FTP: {}".format(url))
    target_daily = os.path.join(download_dir, os.path.basename(daily))
    you_gotta_download = False
    if not os.path.isfile(target_daily):
        you_gotta_download = True
    else:
        # Re-download the file if it is >=12 hours older relative to current day at start of script execution
        modtime = datetime.fromtimestamp(os.path.getmtime(target_daily))
        if modtime <= (today - timedelta(0.5)):
            you_gotta_download = True
    if you_gotta_download:  # then download!
        print "Retrieving {}-hemisphere daily data from NSIDC FTP...".format(
            hemisphere)
        remote_data = urlopen(daily)
        with open(target_daily, 'wb') as local_data:
            copyfileobj(remote_data, local_data)
        remote_data.close()
    else:
        print "Existing local daily files for {}-hemisphere are fresh enough...".format(
            hemisphere)
    # return references to data files for later use in plotting
    return target_daily, target_climo
コード例 #24
0
ファイル: NCBI.py プロジェクト: melakbet/MAVR
    def get_taxa_genomes_summary(self, taxa, email, output_directory, output_prefix,
                                 max_ids_per_query=8000, max_download_attempts=500,
                                 min_scaffold_n50=None, min_contig_n50=None, max_scaffold_l50=None,
                                 max_contig_l50=None, max_contig_count=None, max_scaffold_count=None,
                                 max_chromosome_count=None, min_chromosome_count=None, max_unlocalized_scaffolds=None,
                                 max_unplaced_scaffolds=None, max_total_length=None, min_total_length=None,
                                 max_ungapped_length=None, min_ungapped_length=None,
                                 no_ambiguous_species=True):
        Entrez.email = email
        taxa_list = taxa if isinstance(taxa, Iterable) else [taxa]

        all_files_dir = "%s%s/" % (self.check_path(output_directory), "all")
        nonambiguous_species_all_dir = "%snonambiguous_species_all/" % self.check_path(output_directory)
        ambiguous_species_all_dir = "%s%s/" % (self.check_path(output_directory), "ambiguous_species_all")
        chromosome_lvl_dir = "%s%s/" % (self.check_path(output_directory), "chromosome_lvl")
        non_chromosome_lvl_dir = "%s%s/" % (self.check_path(output_directory), "nonchromosome_lvl")

        filtered_by_integrity_dir = "%s%s/" % (self.check_path(output_directory), "passed_integrity_filters")
        filtered_out_by_integrity_dir = "%s%s/" % (self.check_path(output_directory), "not_passed_integrity_filters")

        stat_dir = "%s%s/" % (self.check_path(output_directory), "stat")
        taxa_stat_dir = "%s%s/" % (self.check_path(output_directory), "taxa_stat")
        for subdir in (all_files_dir, chromosome_lvl_dir, non_chromosome_lvl_dir, stat_dir,
                       taxa_stat_dir, nonambiguous_species_all_dir, ambiguous_species_all_dir):
            self.save_mkdir(subdir)

        filter_by_integrity = min_scaffold_n50 or min_contig_n50 or max_scaffold_l50 or max_contig_l50 \
                              or max_contig_count or max_scaffold_count or max_chromosome_count \
                              or min_chromosome_count or max_unlocalized_scaffolds \
                              or max_unplaced_scaffolds or max_total_length or min_total_length \
                              or max_ungapped_length or min_ungapped_length

        if filter_by_integrity:
            for subdir in (filtered_by_integrity_dir, filtered_out_by_integrity_dir):
                self.save_mkdir(subdir)

        for taxon in taxa_list:
            search_term = "%s[Orgn]" % taxon

            attempt_counter = 1
            while True:
                try:
                    summary = Entrez.read(Entrez.esearch(db="genome", term=search_term, retmax=10000, retmode="xml"))
                    break
                except URLError:
                    if attempt_counter > max_download_attempts:
                        URLError("Network problems. Maximum attempt number is exceeded")
                    print "URLError. Retrying... Attempt %i" % attempt_counter
                    attempt_counter += 1

            print "Were found %s species" % summary["Count"]
            #print summary

            taxon_stat_file = "%s/%s.stat" % (taxa_stat_dir, taxon.replace(" ", "_"))
            taxon_stat_dict = TwoLvlDict()

            for species_id in summary["IdList"]: #[167] :
                print "Handling species id %s " % species_id

                species_stat_file = "%s/%s.stat" % (stat_dir, species_id)
                species_stat_dict = TwoLvlDict()
                species_stat_dict[species_id] = OrderedDict()

                taxon_stat_dict[species_id] = OrderedDict()

                for stat in "all", "chromosome_lvl", "non_chromosome_lvl":
                    species_stat_dict[species_id][stat] = 0
                    taxon_stat_dict[species_id][stat] = 0
                #species_summary = Entrez.read(Entrez.esummary(db="genome", id=species_id, retmax=10000, retmode="xml"))
                #print species_summary

                # get assemblies linked with genome of species

                attempt_counter = 1
                while True:
                    try:
                        assembly_links = Entrez.read(Entrez.elink(dbfrom="genome", id=species_id, retmode="xml",
                                                                  retmax=10000, linkname="genome_assembly"))
                        break
                    except URLError:
                        if attempt_counter > max_download_attempts:
                            URLError("Network problems. Maximum attempt number is exceeded")
                        print "URLError. Retrying... Attempt %i" % attempt_counter
                        attempt_counter += 1

                assembly_number = len(assembly_links)
                #print links
                #print links[0]["LinkSetDb"][0]["Link"]
                if assembly_links:
                    if "LinkSetDb" in assembly_links[0]:
                        if assembly_links[0]["LinkSetDb"]:
                            if "Link" in assembly_links[0]["LinkSetDb"][0]:
                                assembly_ids = [id_dict["Id"] for id_dict in assembly_links[0]["LinkSetDb"][0]["Link"]]
                            else:
                                continue
                        else:
                            continue
                    else:
                        continue
                else:
                    continue
                number_of_ids = len(assembly_ids)

                print "\tFound %i assemblies" % number_of_ids

                id_group_edges = np.arange(0, number_of_ids+1, max_ids_per_query)

                if id_group_edges[-1] != number_of_ids:
                    id_group_edges = np.append(id_group_edges, number_of_ids)

                number_of_id_groups = len(id_group_edges) - 1

                #print len(assembly_links[0]["LinkSetDb"][0]["Link"])
                #print assembly_ids
                #print len(assembly_ids)
                #assembly_dict = TwoLvlDict()
                #assemblies_with_ambiguous_taxonomies = SynDict()
                #summaries = Entrez.read(Entrez.esummary(db="assembly", id=",".join(assembly_ids), retmode="xml"))

                summary_list = None
                for i in range(0, number_of_id_groups):
                    print "\tDownloading summary about assemblies %i - %i" % (id_group_edges[i]+1, id_group_edges[i+1])
                    #print len(assembly_ids[id_group_edges[i]:id_group_edges[i+1]])
                    summaries = Entrez.read(Entrez.esummary(db="assembly",
                                                            id=",".join(assembly_ids[id_group_edges[i]:id_group_edges[i+1]]),
                                                            retmode="xml"), validate=False)
                    tmp_summary_list = AssemblySummaryList(entrez_summary_biopython=summaries)
                    summary_list = (summary_list + tmp_summary_list) if summary_list else tmp_summary_list

                print "\tDownloaded %i" % len(summary_list)

                if len(summary_list) != number_of_ids:
                    print "\tWARNING:Not all assemblies were downloaded"
                    """
                    print "\tFollowing assemblies were not downloaded(ids):%s" % ",".join(set())
                    """

                if summary_list:
                    species_stat_dict[species_id]["all"] = len(summary_list)
                    taxon_stat_dict[species_id]["all"] = len(summary_list)
                    output_file = "%s%s.genome.summary" % ((output_prefix + ".") if output_prefix else "", species_id)
                                                           #summary_list[0]['SpeciesName'].replace(" ", "_"))

                    all_output_file = "%s/%s" % (all_files_dir, output_file)
                    chromosome_lvl_output_file = "%s/%s" % (chromosome_lvl_dir, output_file)
                    non_chromosome_lvl_output_file = "%s/%s" % (non_chromosome_lvl_dir, output_file)
                    nonambiguous_species_output_file = "%s/%s" % (nonambiguous_species_all_dir, output_file)
                    ambiguous_species_output_file = "%s/%s" % (ambiguous_species_all_dir, output_file)
                    chromosome_lvl_summary_list, non_chromosome_lvl_summary_list = summary_list.filter_non_chrom_level_genomes()
                    filtered_by_integrity_file = "%s/%s" % (filtered_by_integrity_dir, output_file)
                    filtered_out_by_integrity_file = "%s/%s" % (filtered_out_by_integrity_dir, output_file)

                    species_stat_dict[species_id]["chromosome_lvl"] = len(chromosome_lvl_summary_list)
                    taxon_stat_dict[species_id]["chromosome_lvl"] = len(chromosome_lvl_summary_list)
                    species_stat_dict[species_id]["non_chromosome_lvl"] = len(non_chromosome_lvl_summary_list)
                    taxon_stat_dict[species_id]["non_chromosome_lvl"] = len(non_chromosome_lvl_summary_list)

                    print("\tChromosome level assemblies %i" % species_stat_dict[species_id]["chromosome_lvl"])
                    print("\tNon chromosome level assemblies %i" % species_stat_dict[species_id]["non_chromosome_lvl"])

                    if chromosome_lvl_summary_list:
                        chromosome_lvl_summary_list.write(chromosome_lvl_output_file)

                    if non_chromosome_lvl_summary_list:
                        non_chromosome_lvl_summary_list.write(non_chromosome_lvl_output_file)

                    nonambiguous_species_summary_list, ambiguous_species_summary_list = summary_list.filter_ambiguous_species()
                    #print(len(nonambiguous_species_summary_list), len(ambiguous_species_summary_list))
                    species_stat_dict[species_id]["nonambiguous_species"] = len(nonambiguous_species_summary_list)
                    species_stat_dict[species_id]["ambiguous_species"] = len(ambiguous_species_summary_list)
                    print "\tAmbiguous species %i" % species_stat_dict[species_id]["ambiguous_species"]
                    if nonambiguous_species_summary_list:
                        nonambiguous_species_summary_list.write(nonambiguous_species_output_file)
                    if ambiguous_species_summary_list:
                        ambiguous_species_summary_list.write(ambiguous_species_output_file)

                    summary_list.write(all_output_file)

                    if filter_by_integrity:
                        filtered_by_integrity, filtered_out_by_integrity = summary_list.filter_by_integrity(min_scaffold_n50=min_scaffold_n50,
                                                                                                            min_contig_n50=min_contig_n50,
                                                                                                            max_scaffold_l50=max_scaffold_l50,
                                                                                                            max_contig_l50=max_contig_l50,
                                                                                                            max_contig_count=max_contig_count,
                                                                                                            max_scaffold_count=max_scaffold_count,
                                                                                                            max_chromosome_count=max_chromosome_count,
                                                                                                            min_chromosome_count=min_chromosome_count,
                                                                                                            max_unlocalized_scaffolds=max_unlocalized_scaffolds,
                                                                                                            max_unplaced_scaffolds=max_unplaced_scaffolds,
                                                                                                            max_total_length=max_total_length,
                                                                                                            min_total_length=min_total_length,
                                                                                                            max_ungapped_length=max_ungapped_length,
                                                                                                            min_ungapped_length=min_ungapped_length,
                                                                                                            no_ambiguous_species=no_ambiguous_species)
                        species_stat_dict[species_id]["filtered_by_integrity"] = len(filtered_by_integrity)
                        species_stat_dict[species_id]["filtered_out_by_integrity"] = len(filtered_out_by_integrity)
                        if filtered_by_integrity:
                            filtered_by_integrity.write(filtered_by_integrity_file)
                        if filtered_out_by_integrity:
                            filtered_out_by_integrity.write(filtered_out_by_integrity_file)
                        print "\tPassed integrity filters %i" % species_stat_dict[species_id]["filtered_by_integrity"]
                species_stat_dict.write(species_stat_file)

                print "\n\n"

            taxon_stat_dict.write(taxon_stat_file)

            """
コード例 #25
0
ファイル: urllib2_file.py プロジェクト: sgricci/digsby
def do_request_(self, request):
    host = request.get_host()
    if not host:
        raise URLError('no host given')

    data = request.get_data()
    v_files = []
    v_vars = []
    if request.has_data() and not isinstance(data, str):  #POST
        if hasattr(data, 'items'):
            data = data.items()
        else:
            try:
                if len(data) and not isinstance(data[0], tuple):
                    raise TypeError
            except TypeError:
                _ty, _va, tb = sys.exc_info()
                try:
                    raise TypeError, "not a valid non-string sequence or mapping object: %r" % type(
                        data), tb
                finally:
                    del tb
        for (k, v) in data:
            if hasattr(v, 'read'):
                v_files.append((k, v))
            else:
                v_vars.append((k, v))
        boundary = mimetools.choose_boundary()
        request.boundary = boundary
        request.v_files = v_files
        request.v_vars = v_vars
    # no file ? convert to string
    if len(v_vars) > 0 and len(v_files) == 0:
        request.data = data = urllib.urlencode(v_vars)
        v_files[:] = []
        v_vars[:] = []

    if request.has_data():
        if not 'Content-type' in request.headers:
            if len(v_files) > 0:
                l = send_data(v_vars, v_files, boundary)
                request.add_unredirected_header(
                    'Content-Type',
                    'multipart/form-data; boundary=%s' % boundary)
                request.add_unredirected_header('Content-length', str(l))
            else:
                request.add_unredirected_header(
                    'Content-type', 'application/x-www-form-urlencoded')
                if not 'Content-length' in request.headers:
                    request.add_unredirected_header('Content-length',
                                                    '%d' % len(data))

    _scheme, sel = splittype(request.get_selector())
    sel_host, _sel_path = splithost(sel)
    if not request.has_header('Host'):
        request.add_unredirected_header('Host', sel_host or host)
    for name, value in self.parent.addheaders:
        name = name.capitalize()
        if not request.has_header(name):
            request.add_unredirected_header(name, value)

    return request
コード例 #26
0
 def __call__(self, url):
     package = url.split('/')[-2]
     try:
         return StringIO(json.dumps(self.results[package]))
     except KeyError:
         raise URLError('404')
コード例 #27
0
 def mockedConnectionTimeOut(self, request):
   raise URLError('Connection timed out')
コード例 #28
0
 def unknown_open(self, req):
     type = req.get_type()
     raise URLError('unknown url type: %s' % type)
コード例 #29
0
 def mockedGetAddrInfoFailed(self, request):
   raise URLError('Getaddrinfo failed')
コード例 #30
0
ファイル: test_sdss.py プロジェクト: anitameh/astroquery
 def get_readable_fileobj_mockreturn(filename, **kwargs):
     e = URLError('timeout')
     e.reason = socket.timeout()
     raise e
     yield True
コード例 #31
0
 def raise_exc(*args, **kwargs):
     raise base.NodeError(MockRequest(), URLError("something bad"))