def test_report_remember_cookie_duration(self, mock_reportbuild_run): duration = self.app.config['PERMANENT_SESSION_LIFETIME'] # test the page pdf. We are unauthorized, so this should give us error: with self.app.test_request_context(): app = self.app.test_client() # If we add routes with the slash at the end, we should # add follow_redirect=True to app.get. See # http://flask.pocoo.org/docs/0.11/quickstart/#routing res = app.get("/ZE_2012/pdf", follow_redirects=True) # few stupid asserts, the main test is not raising # we should ahve an html page: assert res.status_code == 401 assert mock_reportbuild_run.call_count == 0 # now try to login: # with a non-registered email res = app.post("/ZE_2012/login", data={'email' :'abc'}) assert res.status_code == 401 # thus, we do not access the pdf creation: res = app.get("/ZE_2012/pdf", follow_redirects=True) # few stupid asserts, the main test is not raising # we should ahve an html page: assert res.status_code == 401 assert mock_reportbuild_run.call_count == 0 # now try to login: # with a registered email and wrong permission res = app.post("/ZE_2012/login", data={'email' :'*****@*****.**'}) assert res.status_code == 403 # thus, we do not access the pdf creation: res = app.get("/ZE_2012/pdf", follow_redirects=True) # few stupid asserts, the main test is not raising # we should ahve an html page: assert res.status_code == 401 assert mock_reportbuild_run.call_count == 0 # now try to login: # with another non registered email res = app.post("/ZE_2012/login", data={'email' :'*****@*****.**'}) assert res.status_code == 200 # check that the current user has the fields written with models.session(self.app) as session: user = session.query(models.User).filter(models.User.email == '*****@*****.**').first() assert user.editing_path is not None assert user.login_date is not None # sleep two seconds and see that we should have been logged out from the session # (we set REMEMBER_COOKIE_DURATION = 1 second) time.sleep(duration.total_seconds()+1) # Note that we need to setup urlread for the arcgis image, because we mocked it # (FIXME: we mocked in gfzreport.templates.network.core.utils.urllib2.urlopen, # why is it mocked in map module?!!!) # The signature is: # _get_urlopen_sideeffect(geofon_retval=None, others_retval=None): # Thus, we setup others_retval=URLError, which means that if 'geofon' is not in url # (which is the case for arcgis query) and URLException is raised. # This way, the map is generated with drawcostallines # and the pdf is created. Keep in mind that pdflatex will raise in any case self.mock_urlopen.side_effect = _get_urlopen_sideeffect(None, URLError('wat')) res = app.get("/ZE_2012/pdf", follow_redirects=True) # few stupid asserts, the main test is not raising # we should have an html page. But login session should have been expired! assert res.status_code == 401 assert mock_reportbuild_run.call_count == 0 # now try to login again: # with another non registered email usrname = 'user2_ok' res = app.post("/ZE_2012/login", data={'email' :'*****@*****.**' % usrname}) assert res.status_code == 200 # now try to login with another user. This should fail as we are still in # the session duration: # with another non registered email res = app.post("/ZE_2012/login", data={'email' :'*****@*****.**'}) assert res.status_code == 409 # conflict assert ("Conflict: user '%s' is editing the same report (or forgot to log out): " "by default, his/her session will expire in 0:00:" % usrname) in json.loads(res.data)['message'] # sleep two seconds and see that we should have been logged out from the session # (we set REMEMBER_COOKIE_DURATION = 1 second) time.sleep(duration.total_seconds()+1) # now try to login with the same user. As first user's session time expired, # we should be able to login: res = app.post("/ZE_2012/login", data={'email' :'*****@*****.**'}) assert res.status_code == 200
def do_open(self, http_class, req): """Return an addinfourl object for the request, using http_class. http_class must implement the HTTPConnection API from httplib. The addinfourl return value is a file-like object. It also has methods and attributes including: - info(): return a mimetools.Message object for the headers - geturl(): return the original request URL - code: HTTP status code """ host_port = req.get_host() if not host_port: raise URLError('no host given') h = http_class(host_port, timeout=req.timeout) h.set_debuglevel(self._debuglevel) headers = dict(req.headers) headers.update(req.unredirected_hdrs) # We want to make an HTTP/1.1 request, but the addinfourl # class isn't prepared to deal with a persistent connection. # It will try to read all remaining data from the socket, # which will block while the server waits for the next request. # So make sure the connection gets closed after the (only) # request. headers[b"Connection"] = b"close" # httplib in python 2 needs str() not unicode() for all request # parameters headers = { str(name.title()): str(val) for name, val in headers.items() } if req._tunnel_host: set_tunnel = h.set_tunnel if hasattr( h, "set_tunnel") else h._set_tunnel tunnel_headers = {} proxy_auth_hdr = b"Proxy-Authorization" if proxy_auth_hdr in headers: tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] # Proxy-Authorization should not be sent to origin server. del headers[proxy_auth_hdr] set_tunnel(req._tunnel_host, headers=tunnel_headers) try: h.request(str(req.get_method()), str(req.get_selector()), req.data, headers) r = h.getresponse() except socket.error as err: # XXX what error? raise URLError(err) # Pick apart the HTTPResponse object to get the addinfourl # object initialized properly. # Wrap the HTTPResponse object in socket's file object adapter # for Windows. That adapter calls recv(), so delegate recv() # to read(). This weird wrapping allows the returned object to # have readline() and readlines() methods. # XXX It might be better to extract the read buffering code # out of socket._fileobject() and into a base class. r.recv = r.read fp = create_readline_wrapper(r) resp = closeable_response(fp, r.msg, req.get_full_url(), r.status, r.reason, getattr(r, 'version', None)) return resp
def test_hud_data__error(self, mock_urlopen): """ Testing hud_data, raise a URLError exception """ mock_urlopen.side_effect = URLError('URLError exception') response = self.cmd.hud_data('services') self.assertTrue('URLError exception' in self.cmd.errors) self.assertTrue(response == [])
def test_invoke_value_error(self, get_data): get_data.side_effect = URLError('Nope!') actual = get_target().invoke(COMMAND_WITH_VERB, "fake_user") assert actual == SlackResponse.text("('Nope!',)")
try: remote_api_stub.MaybeInvokeAuthentication() except HTTPError, e: if not have_appserver: print 'Retrying in %d seconds...' % retry_delay time.sleep(retry_delay) retry_delay *= 2 else: break else: try: remote_api_stub.MaybeInvokeAuthentication() except HTTPError, e: raise URLError("%s\n" "Couldn't reach remote_api handler at %s.\n" "Make sure you've deployed your project and " "installed a remote_api handler in app.yaml." % (e, remote_url)) logging.info('Now using the remote datastore for "%s" at %s' % (self.remote_app_id, remote_url)) def flush(self): """Helper function to remove the current datastore and re-open the stubs""" if self.remote: import random, string code = ''.join( [random.choice(string.ascii_letters) for x in range(4)]) print '\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' print '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' print "Warning! You're about to delete the *production* datastore!" print 'Only models defined in your INSTALLED_APPS can be removed!'
def test_get_target_region_error_response(mocker, capsys): _test_get_target_region_error(mocker, capsys, error=URLError('test error'))
def raise_exc(*args, **kwargs): e = base.NodeError(MockRequest(), URLError("Its gone!")) e.code = 404 e.status = '404 Not Found' raise e
def require_string(val): """Check that provided input is a string""" if not (isinstance(val, str) or isinstance(val, unicode)): code = web_code('Invalid input') raise URLError('code=%s' % code)
def main(): ARGS = parse_args(sys.argv[1:]) logging.basicConfig(level=logging.INFO) print parse_reddit_argument(ARGS.reddit) TOTAL = DOWNLOADED = ERRORS = SKIPPED = FAILED = 0 FINISHED = False # Create the specified directory if it doesn't already exist. if not pathexists(ARGS.dir): mkdir(ARGS.dir) # If a regex has been specified, compile the rule (once) RE_RULE = None if ARGS.regex: RE_RULE = re.compile(ARGS.regex) # compile reddit comment url to check if url is one of them reddit_comment_regex = re.compile(r'.*reddit\.com\/r\/(.*?)\/comments') LAST = ARGS.last start_time = None ITEM = None sort_type = ARGS.sort_type if sort_type: sort_type = sort_type.lower() while not FINISHED: ITEMS = getitems(ARGS.reddit, multireddit=ARGS.multireddit, previd=LAST, reddit_sort=sort_type) # measure time and set the program to wait 4 second between request # as per reddit api guidelines end_time = time.clock() if start_time is not None: elapsed_time = end_time - start_time if elapsed_time <= 4: # throttling time.sleep(4 - elapsed_time) start_time = time.clock() if not ITEMS: # No more items to process break for ITEM in ITEMS: TOTAL += 1 # not downloading if url is reddit comment if ('reddit.com/r/' + ARGS.reddit + '/comments/' in ITEM['url'] or re.match(reddit_comment_regex, ITEM['url']) is not None): print ' Skip:[{}]'.format(ITEM['url']) continue if ITEM['score'] < ARGS.score: if ARGS.verbose: print ' SCORE: {} has score of {}'.format( ITEM['id'], ITEM['score']) 'which is lower than required score of {}.'.format( ARGS.score) SKIPPED += 1 continue elif ARGS.sfw and ITEM['over_18']: if ARGS.verbose: print ' NSFW: %s is marked as NSFW.' % (ITEM['id']) SKIPPED += 1 continue elif ARGS.nsfw and not ITEM['over_18']: if ARGS.verbose: print ' Not NSFW, skipping %s' % (ITEM['id']) SKIPPED += 1 continue elif ARGS.regex and not re.match(RE_RULE, ITEM['title']): if ARGS.verbose: print ' Regex match failed' SKIPPED += 1 continue elif ARGS.skipAlbums and 'imgur.com/a/' in ITEM['url']: if ARGS.verbose: print ' Album found, skipping %s' % (ITEM['id']) SKIPPED += 1 continue if ARGS.title_contain and ARGS.title_contain.lower( ) not in ITEM['title'].lower(): if ARGS.verbose: print ' Title not contain "{}",'.format( ARGS.title_contain) 'skipping {}'.format(ITEM['id']) SKIPPED += 1 continue FILECOUNT = 0 try: URLS = extract_urls(ITEM['url']) except Exception: _log.exception("Failed to extract urls for %r", URLS) continue for URL in URLS: try: # Find gfycat if requested if URL.endswith('gif') and ARGS.mirror_gfycat: check = gfycat().check(URL) if check.get("urlKnown"): URL = check.get('webmUrl') # Trim any http query off end of file extension. FILEEXT = pathsplitext(URL)[1] if '?' in FILEEXT: FILEEXT = FILEEXT[:FILEEXT.index('?')] # Only append numbers if more than one file FILENUM = ('_%d' % FILECOUNT if len(URLS) > 1 else '') # create filename based on given input from user if ARGS.filename_format == 'url': FILENAME = '%s%s%s' % (pathsplitext( pathbasename(URL))[0], '', FILEEXT) elif ARGS.filename_format == 'title': FILENAME = '%s%s%s' % (slugify( ITEM['title']), FILENUM, FILEEXT) if len(FILENAME) >= 256: shortened_item_title = slugify( ITEM['title'])[:256 - len(FILENAME)] FILENAME = '%s%s%s' % (shortened_item_title, FILENUM, FILEEXT) else: FILENAME = '%s%s%s' % (ITEM['id'], FILENUM, FILEEXT) # join file with directory FILEPATH = pathjoin(ARGS.dir, FILENAME) # Improve debuggability list URL before download too. # url may be wrong so skip that if URL.encode('utf-8') == 'http://': raise URLError('Url is empty') else: text_templ = ' Attempting to download URL[{}] as [{}].' print text_templ.format(URL.encode('utf-8'), FILENAME.encode('utf-8')) # Download the image try: download_from_url(URL, FILEPATH) # Image downloaded successfully! print ' Sucessfully downloaded URL [%s] as [%s].' % ( URL, FILENAME) DOWNLOADED += 1 FILECOUNT += 1 except FileExistsException, e: print ' %s' % (e) ERRORS += 1 if ARGS.update: print ' Update complete, exiting.' FINISHED = True break except Exception, e: print ' %s' % str(e) ERRORS += 1 if ARGS.num and DOWNLOADED >= ARGS.num: FINISHED = True break except WrongFileTypeException as ERROR: print ' %s' % (ERROR) _log_wrongtype(url=URL, target_dir=ARGS.dir, filecount=FILECOUNT, _downloaded=DOWNLOADED, filename=FILENAME) SKIPPED += 1 except FileExistsException as ERROR: print ' %s' % (ERROR) ERRORS += 1 if ARGS.update: print ' Update complete, exiting.' FINISHED = True break
def raise_exc(*args, **kwargs): e = base.NodeError(MockRequest(), URLError("somthing bad")) e.code = 400 e.status = '400 something bad' raise e
def raise_exc(*args, **kwargs): raise URLError("something bad")
def __init__(self, request, timeout=None): raise URLError('blah')
def raise_exc(*args, **kwargs): e = base.NodeError(MockRequest(), URLError("fake 404")) e.code = 404 raise e
def raise_exc(self, node, method, path, **kwargs): node_request_path.append(path) raise base.NodeError(MockRequest(), URLError("something bad"))
print "503-error waiting the suggested %s seconds" % retry for x in range(retry): time.sleep(1) print(x + 1) else: raise IOError except AttributeError, e: print "Attribute Error (xml?) %s" % e except ParseError, e: print "ParseError %s" % e except URLError, e: raise URLError( "While opening URL: %s with parameters %s an error turned up %s" % (self.endpoint, params, e)) def buildHeader(self, header_node): """extract header information of header_node into Header object""" identifier = None datestamp = None isdeleted = False for children in header_node: if children.tag == '{' + OAI_NS + '}identifier': identifier = children.text elif children.tag == '{' + OAI_NS + '}datestamp': if re.match(r"\d\d\d\d\-\d\d\-\d\d$", children.text): children.text += "T00:00:00Z" if re.match( r"\d\d\d\d\-\d\d\-\d\dT\d\d:\d\d(:\d\d)?(Z|[+-]\d\d:\d\d)$",
rpc_server_factory=rpc_server_factory) retry_delay = 1 while retry_delay <= 16: try: remote_api_stub.MaybeInvokeAuthentication() except HTTPError, e: if not have_appserver: logging.info("Retrying in %d seconds..." % retry_delay) time.sleep(retry_delay) retry_delay *= 2 else: break else: try: remote_api_stub.MaybeInvokeAuthentication() except HTTPError, e: raise URLError("%s\n" "Couldn't reach remote_api handler at %s.\n" "Make sure you've deployed your project and " "installed a remote_api handler in app.yaml. " "Note that login is only supported for " "Google Accounts. Make sure you've configured " "the correct authentication method in the " "App Engine Dashboard." % (e, remote_url)) logging.info("Now using the remote datastore for '%s' at %s." % (connection.remote_app_id, remote_url)) self.active_stubs = 'remote' stub_manager = StubManager()
def get_readable_fileobj_mockreturn(filename, **kwargs): e = URLError('timeout') e.reason = socket.timeout() raise e yield True
def update(self, manuf_url=None, wfa_url=None, manuf_name=None, refresh=True): """Update the Wireshark OUI database to the latest version. Args: manuf_url (str): URL pointing to OUI database. Defaults to database located at code.wireshark.org. manuf_name (str): Location to store the new OUI database. Defaults to "manuf" in the same directory. refresh (bool): Refresh the database once updated. Defaults to True. Uses database stored at manuf_name. Raises: URLError: If the download fails """ if not manuf_url: manuf_url = self.MANUF_URL if not manuf_name: manuf_name = self._manuf_name # Retrieve the new database try: response = urlopen(manuf_url) except URLError: raise URLError("Failed downloading OUI database") # Parse the response if response.code == 200: with open(manuf_name, "wb") as write_file: write_file.write(response.read()) if refresh: self.refresh(manuf_name) else: err = "{0} {1}".format(response.code, response.msg) raise URLError("Failed downloading database: {0}".format(err)) response.close() if not wfa_url: wfa_url = self.WFA_URL # Append WFA to new database try: response = urlopen(wfa_url) except URLError: raise URLError("Failed downloading WFA database") # Parse the response if response.code == 200: with open(manuf_name, "ab") as write_file: write_file.write(response.read()) if refresh: self.refresh(manuf_name) else: err = "{0} {1}".format(response.code, response.msg) raise URLError("Failed downloading database: {0}".format(err)) response.close()
def test_get_geo_ip_url_error(Client_mock): Client_mock.get_soap_client_service.side_effect = URLError('') result = get_geo_ip('1.2.3.4') Client_mock.get_soap_client_service.assert_called_once() assert None == result
def _request(cls, url, post_data=None, timeout=REQUEST_TIMEOUT, attempts=REQUEST_ATTEMPTS): # change fb__explicitly_shared to fb:explicitly_shared if post_data: post_data = dict( (k.replace('__', ':'), v) for k, v in post_data.items()) logger.info('requesting url %s with post data %s', url, post_data) post_request = (post_data is not None or 'method=post' in url) if post_request and facebook_settings.FACEBOOK_READ_ONLY: logger.info('running in readonly mode') response = dict(id=123456789, setting_read_only=True) return response # nicely identify ourselves before sending the request opener = build_opener() opener.addheaders = [('User-agent', 'Open Facebook Python')] # get the statsd path to track response times with path = urlparse(url).path statsd_path = path.replace('.', '_') # give it a few shots, connection is buggy at times timeout_mp = 0 while attempts: # gradually increase the timeout upon failure timeout_mp += 1 extended_timeout = timeout * timeout_mp response_file = None encoded_params = encode_params(post_data) if post_data else None post_string = (urlencode(encoded_params) if post_data else None) try: start_statsd('facebook.%s' % statsd_path) try: response_file = opener.open(url, post_string, timeout=extended_timeout) response = response_file.read().decode('utf8') except (HTTPError, ) as e: response_file = e response = response_file.read().decode('utf8') # Facebook sents error codes for many of their flows # we still want the json to allow for proper handling msg_format = 'FB request, error type %s, code %s' logger.warn(msg_format, type(e), getattr(e, 'code', None)) # detect if its a server or application error server_error = cls.is_server_error(e, response) if server_error: # trigger a retry raise URLError('Facebook is down %s' % response) break except (HTTPError, URLError, ssl.SSLError) as e: # These are often temporary errors, so we will retry before # failing error_format = 'Facebook encountered a timeout (%ss) or error %s' logger.warn(error_format, extended_timeout, unicode(e)) attempts -= 1 if not attempts: # if we have no more attempts actually raise the error error_instance = facebook_exceptions.convert_unreachable_exception( e) error_msg = 'Facebook request failed after several retries, raising error %s' logger.warn(error_msg, error_instance) raise error_instance finally: if response_file: response_file.close() stop_statsd('facebook.%s' % statsd_path) # Faceboook response is either # Valid json # A string which is a querydict (a=b&c=d...etc) # A html page stating FB is having trouble (but that shouldnt reach # this part of the code) try: parsed_response = json.loads(response) logger.info('facebook send response %s' % parsed_response) except Exception as e: # using exception because we need to support multiple json libs :S parsed_response = QueryDict(response, True) logger.info('facebook send response %s' % parsed_response) if parsed_response and isinstance(parsed_response, dict): # of course we have two different syntaxes if parsed_response.get('error'): cls.raise_error(parsed_response['error']['type'], parsed_response['error']['message'], parsed_response['error'].get('code')) elif parsed_response.get('error_code'): cls.raise_error(parsed_response['error_code'], parsed_response['error_msg']) return parsed_response
def from_format(cls, format): format = format.lower() if format not in cls.VALID_FORMATS: raise URLError("Unsupported export format: %s!" % format) return cls(format, **cls.FORMAT_DICT[format])
def _test_get_target_region_from_dns_format(mocker, config): mocker.patch('mount_efs.get_aws_ec2_metadata_token', return_value=None) mocker.patch('mount_efs.urlopen', side_effect=URLError('test error')) assert TARGET_REGION == mount_efs.get_target_region(config)
def download_daily_data(download_dir, hemisphere='N'): ''' Required: <download_dir> directory to store the data files Optional: <hemisphere> one of 'N'/'North' or 'S'/'South', default is 'N' ''' today = datetime.today() if hemisphere.upper() in ['SOUTH', 'S']: url = 'ftp://sidads.colorado.edu/DATASETS/NOAA/G02135/south/daily/data/' else: url = 'ftp://sidads.colorado.edu/DATASETS/NOAA/G02135/north/daily/data/' try: ftp_listing = urlopen(url).read().splitlines() except URLError: raise URLError("Cannot connect to NSIDC FTP url: {}".format(url)) data_files = [f.split()[-1] for f in ftp_listing[:4] if f.endswith('.csv')] # climatology file (.csv) try: climatology = [ url + "/" + d for d in data_files if 'climatology' in d ][0] except IndexError: raise Exception( "Cannot locate climatology dataset at FTP: {}".format(url)) target_climo = os.path.join(download_dir, os.path.basename(climatology)) you_gotta_download = False if not os.path.isfile(target_climo): you_gotta_download = True else: # Re-download the file if it is >=12 hours older relative to "today" at start of script execution modtime = datetime.fromtimestamp(os.path.getmtime(target_climo)) if modtime <= today - timedelta(0.5): you_gotta_download = True if you_gotta_download: # then go get it! print "Retrieving {}-hemisphere climatology data from NSIDC FTP...".format( hemisphere) remote_data = urlopen(climatology) with open(target_climo, 'wb') as local_data: copyfileobj(remote_data, local_data) remote_data.close() else: print "Existing local climatology files for {}-hemisphere are fresh enough...".format( hemisphere) # daily data file (.csv) try: daily = [url + "/" + d for d in data_files if 'daily' in d][0] except IndexError: raise Exception("Cannot locate daily dataset at FTP: {}".format(url)) target_daily = os.path.join(download_dir, os.path.basename(daily)) you_gotta_download = False if not os.path.isfile(target_daily): you_gotta_download = True else: # Re-download the file if it is >=12 hours older relative to current day at start of script execution modtime = datetime.fromtimestamp(os.path.getmtime(target_daily)) if modtime <= (today - timedelta(0.5)): you_gotta_download = True if you_gotta_download: # then download! print "Retrieving {}-hemisphere daily data from NSIDC FTP...".format( hemisphere) remote_data = urlopen(daily) with open(target_daily, 'wb') as local_data: copyfileobj(remote_data, local_data) remote_data.close() else: print "Existing local daily files for {}-hemisphere are fresh enough...".format( hemisphere) # return references to data files for later use in plotting return target_daily, target_climo
def get_taxa_genomes_summary(self, taxa, email, output_directory, output_prefix, max_ids_per_query=8000, max_download_attempts=500, min_scaffold_n50=None, min_contig_n50=None, max_scaffold_l50=None, max_contig_l50=None, max_contig_count=None, max_scaffold_count=None, max_chromosome_count=None, min_chromosome_count=None, max_unlocalized_scaffolds=None, max_unplaced_scaffolds=None, max_total_length=None, min_total_length=None, max_ungapped_length=None, min_ungapped_length=None, no_ambiguous_species=True): Entrez.email = email taxa_list = taxa if isinstance(taxa, Iterable) else [taxa] all_files_dir = "%s%s/" % (self.check_path(output_directory), "all") nonambiguous_species_all_dir = "%snonambiguous_species_all/" % self.check_path(output_directory) ambiguous_species_all_dir = "%s%s/" % (self.check_path(output_directory), "ambiguous_species_all") chromosome_lvl_dir = "%s%s/" % (self.check_path(output_directory), "chromosome_lvl") non_chromosome_lvl_dir = "%s%s/" % (self.check_path(output_directory), "nonchromosome_lvl") filtered_by_integrity_dir = "%s%s/" % (self.check_path(output_directory), "passed_integrity_filters") filtered_out_by_integrity_dir = "%s%s/" % (self.check_path(output_directory), "not_passed_integrity_filters") stat_dir = "%s%s/" % (self.check_path(output_directory), "stat") taxa_stat_dir = "%s%s/" % (self.check_path(output_directory), "taxa_stat") for subdir in (all_files_dir, chromosome_lvl_dir, non_chromosome_lvl_dir, stat_dir, taxa_stat_dir, nonambiguous_species_all_dir, ambiguous_species_all_dir): self.save_mkdir(subdir) filter_by_integrity = min_scaffold_n50 or min_contig_n50 or max_scaffold_l50 or max_contig_l50 \ or max_contig_count or max_scaffold_count or max_chromosome_count \ or min_chromosome_count or max_unlocalized_scaffolds \ or max_unplaced_scaffolds or max_total_length or min_total_length \ or max_ungapped_length or min_ungapped_length if filter_by_integrity: for subdir in (filtered_by_integrity_dir, filtered_out_by_integrity_dir): self.save_mkdir(subdir) for taxon in taxa_list: search_term = "%s[Orgn]" % taxon attempt_counter = 1 while True: try: summary = Entrez.read(Entrez.esearch(db="genome", term=search_term, retmax=10000, retmode="xml")) break except URLError: if attempt_counter > max_download_attempts: URLError("Network problems. Maximum attempt number is exceeded") print "URLError. Retrying... Attempt %i" % attempt_counter attempt_counter += 1 print "Were found %s species" % summary["Count"] #print summary taxon_stat_file = "%s/%s.stat" % (taxa_stat_dir, taxon.replace(" ", "_")) taxon_stat_dict = TwoLvlDict() for species_id in summary["IdList"]: #[167] : print "Handling species id %s " % species_id species_stat_file = "%s/%s.stat" % (stat_dir, species_id) species_stat_dict = TwoLvlDict() species_stat_dict[species_id] = OrderedDict() taxon_stat_dict[species_id] = OrderedDict() for stat in "all", "chromosome_lvl", "non_chromosome_lvl": species_stat_dict[species_id][stat] = 0 taxon_stat_dict[species_id][stat] = 0 #species_summary = Entrez.read(Entrez.esummary(db="genome", id=species_id, retmax=10000, retmode="xml")) #print species_summary # get assemblies linked with genome of species attempt_counter = 1 while True: try: assembly_links = Entrez.read(Entrez.elink(dbfrom="genome", id=species_id, retmode="xml", retmax=10000, linkname="genome_assembly")) break except URLError: if attempt_counter > max_download_attempts: URLError("Network problems. Maximum attempt number is exceeded") print "URLError. Retrying... Attempt %i" % attempt_counter attempt_counter += 1 assembly_number = len(assembly_links) #print links #print links[0]["LinkSetDb"][0]["Link"] if assembly_links: if "LinkSetDb" in assembly_links[0]: if assembly_links[0]["LinkSetDb"]: if "Link" in assembly_links[0]["LinkSetDb"][0]: assembly_ids = [id_dict["Id"] for id_dict in assembly_links[0]["LinkSetDb"][0]["Link"]] else: continue else: continue else: continue else: continue number_of_ids = len(assembly_ids) print "\tFound %i assemblies" % number_of_ids id_group_edges = np.arange(0, number_of_ids+1, max_ids_per_query) if id_group_edges[-1] != number_of_ids: id_group_edges = np.append(id_group_edges, number_of_ids) number_of_id_groups = len(id_group_edges) - 1 #print len(assembly_links[0]["LinkSetDb"][0]["Link"]) #print assembly_ids #print len(assembly_ids) #assembly_dict = TwoLvlDict() #assemblies_with_ambiguous_taxonomies = SynDict() #summaries = Entrez.read(Entrez.esummary(db="assembly", id=",".join(assembly_ids), retmode="xml")) summary_list = None for i in range(0, number_of_id_groups): print "\tDownloading summary about assemblies %i - %i" % (id_group_edges[i]+1, id_group_edges[i+1]) #print len(assembly_ids[id_group_edges[i]:id_group_edges[i+1]]) summaries = Entrez.read(Entrez.esummary(db="assembly", id=",".join(assembly_ids[id_group_edges[i]:id_group_edges[i+1]]), retmode="xml"), validate=False) tmp_summary_list = AssemblySummaryList(entrez_summary_biopython=summaries) summary_list = (summary_list + tmp_summary_list) if summary_list else tmp_summary_list print "\tDownloaded %i" % len(summary_list) if len(summary_list) != number_of_ids: print "\tWARNING:Not all assemblies were downloaded" """ print "\tFollowing assemblies were not downloaded(ids):%s" % ",".join(set()) """ if summary_list: species_stat_dict[species_id]["all"] = len(summary_list) taxon_stat_dict[species_id]["all"] = len(summary_list) output_file = "%s%s.genome.summary" % ((output_prefix + ".") if output_prefix else "", species_id) #summary_list[0]['SpeciesName'].replace(" ", "_")) all_output_file = "%s/%s" % (all_files_dir, output_file) chromosome_lvl_output_file = "%s/%s" % (chromosome_lvl_dir, output_file) non_chromosome_lvl_output_file = "%s/%s" % (non_chromosome_lvl_dir, output_file) nonambiguous_species_output_file = "%s/%s" % (nonambiguous_species_all_dir, output_file) ambiguous_species_output_file = "%s/%s" % (ambiguous_species_all_dir, output_file) chromosome_lvl_summary_list, non_chromosome_lvl_summary_list = summary_list.filter_non_chrom_level_genomes() filtered_by_integrity_file = "%s/%s" % (filtered_by_integrity_dir, output_file) filtered_out_by_integrity_file = "%s/%s" % (filtered_out_by_integrity_dir, output_file) species_stat_dict[species_id]["chromosome_lvl"] = len(chromosome_lvl_summary_list) taxon_stat_dict[species_id]["chromosome_lvl"] = len(chromosome_lvl_summary_list) species_stat_dict[species_id]["non_chromosome_lvl"] = len(non_chromosome_lvl_summary_list) taxon_stat_dict[species_id]["non_chromosome_lvl"] = len(non_chromosome_lvl_summary_list) print("\tChromosome level assemblies %i" % species_stat_dict[species_id]["chromosome_lvl"]) print("\tNon chromosome level assemblies %i" % species_stat_dict[species_id]["non_chromosome_lvl"]) if chromosome_lvl_summary_list: chromosome_lvl_summary_list.write(chromosome_lvl_output_file) if non_chromosome_lvl_summary_list: non_chromosome_lvl_summary_list.write(non_chromosome_lvl_output_file) nonambiguous_species_summary_list, ambiguous_species_summary_list = summary_list.filter_ambiguous_species() #print(len(nonambiguous_species_summary_list), len(ambiguous_species_summary_list)) species_stat_dict[species_id]["nonambiguous_species"] = len(nonambiguous_species_summary_list) species_stat_dict[species_id]["ambiguous_species"] = len(ambiguous_species_summary_list) print "\tAmbiguous species %i" % species_stat_dict[species_id]["ambiguous_species"] if nonambiguous_species_summary_list: nonambiguous_species_summary_list.write(nonambiguous_species_output_file) if ambiguous_species_summary_list: ambiguous_species_summary_list.write(ambiguous_species_output_file) summary_list.write(all_output_file) if filter_by_integrity: filtered_by_integrity, filtered_out_by_integrity = summary_list.filter_by_integrity(min_scaffold_n50=min_scaffold_n50, min_contig_n50=min_contig_n50, max_scaffold_l50=max_scaffold_l50, max_contig_l50=max_contig_l50, max_contig_count=max_contig_count, max_scaffold_count=max_scaffold_count, max_chromosome_count=max_chromosome_count, min_chromosome_count=min_chromosome_count, max_unlocalized_scaffolds=max_unlocalized_scaffolds, max_unplaced_scaffolds=max_unplaced_scaffolds, max_total_length=max_total_length, min_total_length=min_total_length, max_ungapped_length=max_ungapped_length, min_ungapped_length=min_ungapped_length, no_ambiguous_species=no_ambiguous_species) species_stat_dict[species_id]["filtered_by_integrity"] = len(filtered_by_integrity) species_stat_dict[species_id]["filtered_out_by_integrity"] = len(filtered_out_by_integrity) if filtered_by_integrity: filtered_by_integrity.write(filtered_by_integrity_file) if filtered_out_by_integrity: filtered_out_by_integrity.write(filtered_out_by_integrity_file) print "\tPassed integrity filters %i" % species_stat_dict[species_id]["filtered_by_integrity"] species_stat_dict.write(species_stat_file) print "\n\n" taxon_stat_dict.write(taxon_stat_file) """
def do_request_(self, request): host = request.get_host() if not host: raise URLError('no host given') data = request.get_data() v_files = [] v_vars = [] if request.has_data() and not isinstance(data, str): #POST if hasattr(data, 'items'): data = data.items() else: try: if len(data) and not isinstance(data[0], tuple): raise TypeError except TypeError: _ty, _va, tb = sys.exc_info() try: raise TypeError, "not a valid non-string sequence or mapping object: %r" % type( data), tb finally: del tb for (k, v) in data: if hasattr(v, 'read'): v_files.append((k, v)) else: v_vars.append((k, v)) boundary = mimetools.choose_boundary() request.boundary = boundary request.v_files = v_files request.v_vars = v_vars # no file ? convert to string if len(v_vars) > 0 and len(v_files) == 0: request.data = data = urllib.urlencode(v_vars) v_files[:] = [] v_vars[:] = [] if request.has_data(): if not 'Content-type' in request.headers: if len(v_files) > 0: l = send_data(v_vars, v_files, boundary) request.add_unredirected_header( 'Content-Type', 'multipart/form-data; boundary=%s' % boundary) request.add_unredirected_header('Content-length', str(l)) else: request.add_unredirected_header( 'Content-type', 'application/x-www-form-urlencoded') if not 'Content-length' in request.headers: request.add_unredirected_header('Content-length', '%d' % len(data)) _scheme, sel = splittype(request.get_selector()) sel_host, _sel_path = splithost(sel) if not request.has_header('Host'): request.add_unredirected_header('Host', sel_host or host) for name, value in self.parent.addheaders: name = name.capitalize() if not request.has_header(name): request.add_unredirected_header(name, value) return request
def __call__(self, url): package = url.split('/')[-2] try: return StringIO(json.dumps(self.results[package])) except KeyError: raise URLError('404')
def mockedConnectionTimeOut(self, request): raise URLError('Connection timed out')
def unknown_open(self, req): type = req.get_type() raise URLError('unknown url type: %s' % type)
def mockedGetAddrInfoFailed(self, request): raise URLError('Getaddrinfo failed')
def get_readable_fileobj_mockreturn(filename, **kwargs): e = URLError('timeout') e.reason = socket.timeout() raise e yield True
def raise_exc(*args, **kwargs): raise base.NodeError(MockRequest(), URLError("something bad"))