def lookup_cik(ticker, name=None): """ Given a ticker symbol, retrieves the CIK. """ ticker = ticker.strip().upper() # First try the SEC. In theory, should for all known symbols, even # deactivated ones. In practice, fails to work for many, even active ones. url = ( 'http://www.sec.gov/cgi-bin/browse-edgar?' 'CIK={cik}&' 'owner=exclude&' 'Find=Find+Companies&' 'action=getcompany' ).format(cik=ticker) request = urllib2.Request(url=url, headers={'User-agent':get_user_agent()}) response = urllib2.urlopen(request) data = response.read() try: match = re.finditer(r'CIK=([0-9]+)', data).next() return match.group().split('=')[-1] except StopIteration: pass # Next, try SEC's other CIK lookup form. # It doesn't always work with just the ticker, so we also need to pass in # company name but it's the next most accurate after the first. # Unfortunately, this search is sensitive to punctuation in the company # name, which we might not have stored correctly. # So we start searching with everything we have, and then backoff to widen # the search. name = (name or '').strip() name = ''.join(_ for _ in (name or '').strip() if ord(_) < 128) if name: name_parts = name.split(' ') for i in six.moves.xrange(len(name_parts)): url = 'http://www.sec.gov/cgi-bin/cik.pl.c?company={company}'\ .format(company='+'.join(name_parts[:-(i+1)])) # response = urllib2.urlopen(url) request = urllib2.Request(url=url, headers={'User-agent':get_user_agent()}) response = urllib2.urlopen(request) data = response.read() matches = re.findall(r'CIK=([0-9]+)', data) if len(matches) == 1: return matches[0] # If the SEC search doesn't find anything, then try Yahoo. # Should work for all active symbols, but won't work for any deactive # symbols. url = 'http://finance.yahoo.com/q/sec?s={symbol}+SEC+Filings'.format(symbol=ticker) #print('url2:',url # response = urllib2.urlopen(url) request = urllib2.Request(url=url, headers={'User-agent':get_user_agent()}) response = urllib2.urlopen(request) data = response.read() try: match = re.finditer(r'search/\?cik=([0-9]+)', data).next() return match.group().split('=')[-1] except StopIteration: pass
def lookup_cik(ticker, name=None): """ Given a ticker symbol, retrieves the CIK. """ ticker = ticker.strip().upper() # First try the SEC. In theory, should for all known symbols, even # deactivated ones. In practice, fails to work for many, even active ones. url = 'http://www.sec.gov/cgi-bin/browse-edgar?CIK={cik}&owner=exclude&Find=Find+Companies&action=getcompany'.format( cik=ticker) request = urllib2.Request(url=url, headers={'User-agent': get_user_agent()}) response = urllib2.urlopen(request) data = response.read() try: match = re.finditer('CIK=([0-9]+)', data).next() return match.group().split('=')[-1] except StopIteration: pass # Next, try SEC's other CIK lookup form. # It doesn't always work with just the ticker, so we also need to pass in # company name but it's the next most accurate after the first. # Unfortunately, this search is sensitive to punctuation in the company # name, which we might not have stored correctly. # So we start searching with everything we have, and then backoff to widen # the search. name = (name or '').strip() name = ''.join(_ for _ in (name or '').strip() if ord(_) < 128) if name: name_parts = name.split(' ') for i in xrange(len(name_parts)): url = 'http://www.sec.gov/cgi-bin/cik.pl.c?company={company}'.format( company='+'.join(name_parts[:-(i + 1)])) # response = urllib2.urlopen(url) request = urllib2.Request(url=url, headers={'User-agent': get_user_agent()}) response = urllib2.urlopen(request) data = response.read() matches = re.findall('CIK=([0-9]+)', data) if len(matches) == 1: return matches[0] # If the SEC search doesn't find anything, then try Yahoo. # Should work for all active symbols, but won't work for any deactive # symbols. url = 'http://finance.yahoo.com/q/sec?s={symbol}+SEC+Filings'.format( symbol=ticker) #print('url2:',url # response = urllib2.urlopen(url) request = urllib2.Request(url=url, headers={'User-agent': get_user_agent()}) response = urllib2.urlopen(request) data = response.read() try: match = re.finditer('search/\?cik=([0-9]+)', data).next() return match.group().split('=')[-1] except StopIteration: pass
def validate_url(self, value): if not URLType.URL_REGEX.match(value): raise StopValidation(self.messages['invalid_url']) if self.verify_exists: from six.moves import urllib try: request = urllib.Request(value) urllib.urlopen(request) except Exception: raise StopValidation(self.messages['not_found'])
def _download_shapefiles(file_name): file_parts = file_name.split("/") file_prefix = file_parts[-1].split(".")[0] exts = [ ".shp", ".dbf", ".shx" ] for ext in exts: # rebuild url file_name = file_prefix + ext file_parts[-1] = file_name new_url = "/".join(file_parts) #print file_name, new_url u = urllib.urlopen(new_url) f = open(file_name, 'wb') meta = u.info() file_size = int(meta.getheaders("Content-Length")[0]) #print "Downloading: %s Bytes: %s" % (file_name, file_size) file_size_dl = 0 block_sz = 8192 while True: bf = u.read(block_sz) if not bf: break file_size_dl += len(bf) f.write(bf) status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size) status = status + chr(8)* (len(status)+1)
def _download_shapefiles(file_name): file_parts = file_name.split("/") file_prefix = file_parts[-1].split(".")[0] exts = [".shp", ".dbf", ".shx"] for ext in exts: # rebuild url file_name = file_prefix + ext file_parts[-1] = file_name new_url = "/".join(file_parts) #print file_name, new_url u = urllib.urlopen(new_url) f = open(file_name, 'wb') meta = u.info() file_size = int(meta.getheaders("Content-Length")[0]) #print "Downloading: %s Bytes: %s" % (file_name, file_size) file_size_dl = 0 block_sz = 8192 while True: bf = u.read(block_sz) if not bf: break file_size_dl += len(bf) f.write(bf) status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size) status = status + chr(8) * (len(status) + 1)
def downloadVideo(self, _id, logs=True): video = self.vdata[_id] url = video['url'] stime = video['start time'] etime = video['end time'] sfname = "%s/%d.mp4" % (self.vdir, _id) if os.path.exists(sfname): if logs: print("Video Id [%d] Already Downloaded" % _id) return sfname youtubeId = self.getYoutubeId(url) turl = "curl 'https://hesetube.com/download.php?id=%s'" % (youtubeId) durl = "https://hesetube.com/video/%s.mp4?start=%f&end=%f" % ( youtubeId, stime, etime) print(durl) print(turl) os.system(turl) cont = urllib.urlopen(durl).read() with open(sfname, "wb") as f: f.write(cont) print("Video Id [%d] Downloaded : %s " % (_id, youtubeId)) fs = os.path.getsize(sfname) if fs < VideoHandler.STHRES: print("Crosscheck failed, File Size : %d" % fs) with open(self.logfile, "a") as f: f.write("Crosscheck file %d, %s with size %d\n" % (_id, youtubeId, fs)) os.remove(sfname) open(sfname, 'a').close() self.takebreak() return None else: self.takebreak() return sfname
def callback(): # FIXME: Setting a timeout, either globally using socket.setdefaulttimeout() # or in urlopen(), doesn't appear to work on Mac 10.5 with Python 2.7. # For now we will ignore the timeout value and hope for the best. request = urllib.Request(self._url, data, {'Content-Type': content_type}) return urllib.urlopen(request)
def check_for_software_updates(flash_message=False): """Check for a new release of Invenio. :return: True if you have latest version, else False if you need to upgrade or None if server was not reachable. """ from invenio.config import CFG_VERSION from invenio.base.i18n import _ try: find = re.compile('Invenio v[0-9]+.[0-9]+.[0-9]+(\-rc[0-9])?' ' is released') webFile = urllib.urlopen("http://invenio-software.org/repo" "/invenio/tree/RELEASE-NOTES") temp = "" version = "" version1 = "" while 1: temp = webFile.readline() match1 = find.match(temp) try: version = match1.group() break except: pass if not temp: break webFile.close() submatch = re.compile('[0-9]+.[0-9]+.[0-9]+(\-rc[0-9])?') version1 = submatch.search(version) web_version = version1.group().split(".") local_version = CFG_VERSION.split(".") if (web_version[0] > local_version[0] or web_version[0] == local_version[0] and web_version[1] > local_version[1] or web_version[0] == local_version[0] and web_version[1] == local_version[1] and web_version[2] > local_version[2]): if flash_message: flash(_('A newer version of Invenio is available for ' 'download. You may want to visit %s') % ('<a href=\"http://invenio-software.org/wiki' '/Installation/Download\">http://invenio-software.org' '/wiki/Installation/Download</a>'), 'warning') return False except Exception as e: print(e) if flash_message: flash(_('Cannot download or parse release notes from http://' 'invenio-software.org/repo/invenio/tree/RELEASE-NOTES'), 'error') return None return True
def Search(self, request, context): query = request.query query = urllib.urlencode({'q': query}) response = urllib.urlopen( 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&' + query) .read() _json = json.loads(response) results = _json['responseData']['results'] for result in results: yield result
def test_image_crop(self): import six.moves.urllib as urllib from PIL import Image import StringIO file = StringIO.StringIO( urllib.urlopen( 'https://avatars2.githubusercontent.com/u/5404851?s=600&v=4'). read()) img = Image.open(file) img = img.crop((0, 0, 400, 600))
def download_trained_weights(coco_model_path, verbose=1): """Download COCO trained weights from Releases. coco_model_path: local path of COCO trained weights """ if verbose > 0: print("Downloading pretrained model to " + coco_model_path + " ...") with urllib.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out: shutil.copyfileobj(resp, out) if verbose > 0: print("... done downloading pretrained model!")
def get_record(self, id): """ For the mathematician in this object, extract the list of advisor ids, the mathematician name, the mathematician institution, and the year of the mathematician's degree. """ url = 'http://genealogy.math.ndsu.nodak.edu/id.php?id=' + str(id) page = urllib.urlopen(url) soup = BeautifulSoup(page, convertEntities='html') page.close() return get_record_from_tree(soup, id)
def call(self, url=False, data=None, method=None): """Send requests to 18000 server. Send HTTPS call, get response in JSON. Convert response into Python Object and return it. """ opener = urllib.build_opener(urllib.HTTPCookieProcessor(self.cookie)) urllib.install_opener(opener) res_json = None try: urllib.socket.setdefaulttimeout(720) req = urllib.Request(url, data, self.headers) if method: req.get_method = lambda: method res = urllib.urlopen(req).read().decode("utf-8") if "xx/sessions" not in url: LOG.info(_LI('\n\n\n\nRequest URL: %(url)s\n\n' 'Call Method: %(method)s\n\n' 'Request Data: %(data)s\n\n' 'Response Data:%(res)s\n\n'), {'url': url, 'method': method, 'data': data, 'res': res}) except Exception as err: LOG.error(_LE('Bad response from server: %(url)s.' ' Error: %(err)s'), {'url': url, 'err': err}) json_msg = ('{"error":{"code": %s,"description": "Connect to ' 'server error."}}'), constants.ERROR_CONNECT_TO_SERVER res_json = json.loads(json_msg) return res_json try: res_json = json.loads(res) except Exception as err: LOG.error(_LE('JSON transfer error: %s.'), err) raise return res_json
def _uri_reader(uri): j = json.load(urllib.urlopen(uri)) return j
BODY_WIDTH = options.body_width GOOGLE_LIST_INDENT = options.list_indent # process input if len(args) > 0: file_ = args[0] encoding = None if len(args) == 2: encoding = args[1] if len(args) > 2: p.error('Too many arguments') if file_.startswith('http://') or file_.startswith('https://'): baseurl = file_ j = urllib.urlopen(baseurl) text = j.read() if encoding is None: try: from feedparser import _getCharacterEncoding as enc except ImportError: enc = lambda x, y: ('utf-8', 1) encoding = enc(j.headers, text)[0] if encoding == 'us-ascii': encoding = 'utf-8' data = text.decode(encoding) else: data = open(file_, 'rb').read() if encoding is None: try:
def _load_network_resource(url): with contextlib.closing(urllib2.urlopen(url, timeout=60.0)) as f: # We fully rely of mime type to remote server b/c according # of specs it MUST support RAML mime mime_type = f.headers.gettype() return f.read(), mime_type
else: six.print_('payment methods parsed') six.print_(small2) six.print_(cl2) six.print_(tabl0) output(hyip) elif method == 'mechanize': six.print_( 'Not supported yet! Use native or dont define @method at all') elif method == 'urllib2': six.print_( 'Not supported yet! Use native or dont define @method at all') exit(1) req = urllib.request(goldpoll_url) # urllib2.Request(goldpoll_url) response = urllib.urlopen(req) # urllib2.urlopen(req) the_page = response.read() webpage = the_page.decode("ISO-8859-1") parser = etree.HTMLParser() tree = etree.fromstring(webpage, parser) elements_c10 = tree.xpath('//table[@class="cl0"]') six.print_(elements_c10) if 'popularhyip' in sites: if method == 'static': dir_separator = ('\\' if is_windows() else '/') doc = html.parse('input' + dir_separator + geckoname) #print etree.tostring(doc) elements_status1 = doc.xpath( '//tr[@class="status1" and (not(@id))]') six.print_(len(elements_status1)) for element in elements_status1:
def download(arguments): """Downloads and uncompresses meta data generated files from Idiap Parameters: arguments (argparse.Namespace): A set of arguments passed by the command-line parser Returns: int: A POSIX compliant return value of ``0`` if the download is successful, or ``1`` in case it is not. Raises: IOError: if metafiles exist and ``--force`` was not passed urllib2.HTTPError: if the target resource does not exist on the webserver """ # What should happen as a combination of flags. Legend: # # 0 - Exit, with status 0 # X - Download, overwrite if there # R - Raise exception, err # # +----------+-----------+----------+--------+ # | complete | --missing | --force | none | # +----------+-----------+----------+--------+ # | yes | 0 | X | R | # +----------+-----------+----------+--------+ # | no | X | X | X | # +----------+-----------+----------+--------+ if not arguments.files: print("Skipping download of metadata files for bob.db.%s: no files " "declared" % arguments.name) # Check we're complete in terms of metafiles complete = True for p in arguments.files: if not os.path.exists(p): complete = False break if complete: if arguments.missing: print("Skipping download of metadata files for `bob.db.%s': complete" % arguments.name) return 0 elif arguments.force: print("Re-downloading metafiles for `bob.db.%s'" % arguments.name) else: raise IOError("Metadata files are already available. Remove metadata " "files before attempting download or --force") # if you get here, all files aren't there, unpack source_url = os.path.join(arguments.source, arguments.name + ".tar.bz2") target_dir = arguments.test_dir # test case if not target_dir: # puts files on the root of the installed package import pkg_resources try: target_dir = pkg_resources.resource_filename('bob.db.%s' % arguments.name, '') except ImportError as e: raise ImportError("The package `bob.db.%s' is not currently " "installed. N.B.: The database and package names **must** " "match. Your package should be named `bob.db.%s', if the driver " "name for your database is `%s'. Check." % (3 * (arguments.name,))) # download file from Idiap server, unpack and remove it import sys import tempfile import tarfile import pkg_resources from .utils import safe_tarmembers if sys.version_info[0] <= 2: import urllib2 as urllib else: import urllib.request as urllib print ("Extracting url `%s' into `%s'" % (source_url, target_dir)) u = urllib.urlopen(source_url) f = tempfile.NamedTemporaryFile(suffix=".tar.bz2") open(f.name, 'wb').write(u.read()) t = tarfile.open(fileobj=f, mode='r:bz2') members = list(safe_tarmembers(t)) for k, m in enumerate(members): print("x [%d/%d] %s" % (k + 1, len(members), m.name,)) t.extract(m, target_dir) t.close() f.close()
def __init__(self, metadata_dir, accept_empty=True): """Initialize reader to metadata channel directory. Channel parameters are read from the attributes of the top-level file 'dmd_properties.h5' in the `metadata_dir`. Parameters ---------- metadata_dir : string Path to metadata channel directory, which contains a 'dmd_properties.h5' file and timestamped subdirectories containing data. accept_empty : bool, optional If True, do not raise an IOError if the 'dmd_properties.h5' file is empty. If False, raise an IOError in that case and delete the empty 'dmd_properties.h5' file. Raises ------ IOError If 'dmd_properties.h5' file is not found in `metadata_dir` or if `accept_empty` is False and the 'dmd_properties.h5' file is empty. """ self._metadata_dir = metadata_dir if self._metadata_dir.find('http://') != -1: self._local = False # put properties file in /tmp/dmd_properties_%i.h5 % (pid) url = os.path.join(self._metadata_dir, 'dmd_properties.h5') try: f = urllib.urlopen(url) except (urllib.URLError, urllib.HTTPError): url = os.path.join(self._metadata_dir, 'metadata.h5') f = urllib.urlopen(url) tmp_file = os.path.join( '/tmp', 'dmd_properties_%i.h5' % (os.getpid()), ) fo = open(tmp_file, 'w') fo.write(f.read()) f.close() fo.close() else: self._local = True # list and match first properties file tmp_file = next( (f for f in sorted( glob.glob( os.path.join( metadata_dir, list_drf.GLOB_DMDPROPFILE, ))) if re.match(list_drf.RE_DMDPROP, f)), None, ) if tmp_file is None: raise IOError('dmd_properties.h5 not found') with h5py.File(tmp_file, 'r') as f: try: subdir_cadence = f.attrs['subdir_cadence_secs'].item() file_cadence = f.attrs['file_cadence_secs'].item() except KeyError: # maybe an older version with subdirectory_cadence_seconds # and file_cadence_seconds subdir_cadence = f.attrs['subdirectory_cadence_seconds'].item() file_cadence = f.attrs['file_cadence_seconds'].item() self._subdir_cadence_secs = subdir_cadence self._file_cadence_secs = file_cadence try: try: spsn = f.attrs['sample_rate_numerator'].item() spsd = f.attrs['sample_rate_denominator'].item() except KeyError: # maybe an older version with samples_per_second_* spsn = f.attrs['samples_per_second_numerator'].item() spsd = f.attrs['samples_per_second_denominator'].item() except KeyError: # must have an older version with samples_per_second attribute sps = f.attrs['samples_per_second'].item() spsfrac = fractions.Fraction(sps).limit_denominator() self._samples_per_second = numpy.longdouble(sps) self._sample_rate_numerator = int(spsfrac.numerator) self._sample_rate_denominator = int(spsfrac.denominator) else: self._sample_rate_numerator = spsn self._sample_rate_denominator = spsd # have to go to uint64 before longdouble to ensure correct # conversion from int self._samples_per_second = ( numpy.longdouble(numpy.uint64( self._sample_rate_numerator)) / numpy.longdouble( numpy.uint64(self._sample_rate_denominator))) self._file_name = f.attrs['file_name'] try: version = f.attrs['digital_metadata_version'] except KeyError: # version is before 2.3 when attribute was added version = '2.0' self._digital_metadata_version = version self._check_compatible_version() try: fields_dataset = f['fields'] except KeyError: if not accept_empty: os.remove(tmp_file) errstr = ('No metadata yet written to %s, removing empty' ' "dmd_properties.h5"') raise IOError(errstr % self._metadata_dir) else: self._fields = None return self._fields = [] for i in range(len(fields_dataset)): self._fields.append(fields_dataset[i]['column']) if not self._local: os.remove(tmp_file)
def fetch(url): return urllib.urlopen(url).read()
def scrape(file_name, number_of_images): f = open(file_name,'r') url_file = f.read() url_list = url_file.split('\n') index = 0 matrices = dict() matrices["up"] = list() matrices["left"] = list() matrices["right"] = list() matrices["down"] = list() for url in url_list: url_list = url.split('\t') real_url = url_list[1] try: file = cStringIO.StringIO(urllib.urlopen(real_url).read()) img = Image.open(file) opencvImage = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) width = opencvImage.shape[0] height = opencvImage.shape[1] x = int(height/2) - 64 y = int(width/2) - 64 crop_image_height = 32 crop_image_width = 32 opencv_image = opencvImage[y:y + crop_image_height, x:x + crop_image_height] opencv_image = cv2.cvtColor(opencv_image, cv2.COLOR_BGR2GRAY) image_utils.saveImage("up.jpg", opencv_image) for up_row in opencv_image: matrices["up"].append(up_row) left_image = image_utils.rotate(opencv_image, 90) image_utils.saveImage("left.jpg", left_image) for left_row in left_image: matrices["left"].append(left_row) right_image = image_utils.rotate(opencv_image, 270) image_utils.saveImage("right.jpg", right_image) for right_row in right_image: matrices["right"].append(right_row) down_image = image_utils.rotate(opencv_image, 180) image_utils.saveImage("down.jpg", down_image) for down_row in down_image: matrices["down"].append(down_row) # last_row = list() # for _ in range(0,31): # last_row.append(-1) # matrices["up"].append(last_row) # matrices["left"].append(last_row) # matrices["right"].append(last_row) # matrices["down"].append(last_row) print (index) index += 1 if(index >= number_of_images): break; except: continue return matrices
def auto_download(self, dataDir, dataType, dataYear): """Download the COCO dataset/annotations if requested. dataDir: The root directory of the COCO dataset. dataType: What to load (train, val, minival, valminusminival) dataYear: What dataset year to load (2014, 2017) as a string, not an integer Note: For 2014, use "train", "val", "minival", or "valminusminival" For 2017, only "train" and "val" annotations are available """ # Setup paths and file names if dataType == "minival" or dataType == "valminusminival": imgDir = "{}/{}{}".format(dataDir, "val", dataYear) imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear) imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format( "val", dataYear) else: imgDir = "{}/{}{}".format(dataDir, dataType, dataYear) imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear) imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format( dataType, dataYear) # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL) # Create main folder if it doesn't exist yet if not os.path.exists(dataDir): os.makedirs(dataDir) # Download images if not available locally if not os.path.exists(imgDir): os.makedirs(imgDir) print("> Downloading images to " + imgZipFile + " ...") with urllib.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out: shutil.copyfileobj(resp, out) print("... done downloading.") print("> Unzipping " + imgZipFile) with zipfile.ZipFile(imgZipFile, "r") as zip_ref: zip_ref.extractall(dataDir) print("... done unzipping") print("> Will use images in " + imgDir) # Setup annotations data paths annDir = "{}/annotations".format(dataDir) if dataType == "minival": annZipFile = "{}/instances_minival2014.json.zip".format(dataDir) annFile = "{}/instances_minival2014.json".format(annDir) annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0" unZipDir = annDir elif dataType == "valminusminival": annZipFile = "{}/instances_valminusminival2014.json.zip".format( dataDir) annFile = "{}/instances_valminusminival2014.json".format(annDir) annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0" unZipDir = annDir else: annZipFile = "{}/annotations_trainval{}.zip".format( dataDir, dataYear) annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear) annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format( dataYear) unZipDir = dataDir # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL) # Download annotations if not available locally if not os.path.exists(annDir): os.makedirs(annDir) if not os.path.exists(annFile): if not os.path.exists(annZipFile): print("> Downloading zipped annotations to " + annZipFile + " ...") with urllib.urlopen(annURL) as resp, open(annZipFile, 'wb') as out: shutil.copyfileobj(resp, out) print("... done downloading.") print("> Unzipping " + annZipFile) with zipfile.ZipFile(annZipFile, "r") as zip_ref: zip_ref.extractall(unZipDir) print("... done unzipping") print("> Will use annotations in " + annFile)
file_path = os.path.join(folder, the_file) try: if os.path.isfile(file_path): os.unlink(file_path) except Exception as e: print(e) page = BeautifulSoup(open("food.html")) images = [] for img in page.findAll('img'): if 'li=' not in img['src']: images.append(img['src']) i = 0 for img in images: path = io.BytesIO(urllib.urlopen(img).read()) path.seek(0, 2) # 0 bytes from the end size = path.tell() if size < 7000: continue path = os.path.join('./images', '%s.jpg' % i) response = urllib.urlretrieve(img, path) i += 1 r = redis.StrictRedis(host='localhost', port=6379, db=0) FLAGS = None # pylint: disable=line-too-long
topics = ["Table of Contents", "You Will Learn", " Course Syllabus", "Learning Objectives", "Learn How To:"] coursetypes = soup.find_all('div', class_="clearfix block block-views") for coursetype in coursetypes: coursetype = coursetype.find('h2').contents[0] ins = coursetype.find_next('tbody') trall = ins.find_all('tr') for tr in trall: temp = tr.find_next('td') ct = temp.find_next('a') ctcontents = ct.contents[0] url2 = prefix+ct['href'] try: html2= urllib.request.urlopen(url2) except AttributeError: html2 = urllib.urlopen(url) soup2 = bs4.BeautifulSoup(html2, "html.parser") instructor = ct.find_next('td').contents[0] platform = instructor.find_next('td').contents[0] startdate = platform.find_next('span').contents[0] regfees = startdate.find_next('td').contents[0] status = regfees.find_next('td').contents[0] dept = status.find_next('a').contents[0] platform = platform.lstrip().rstrip() status = status.lstrip().rstrip() print(ctcontents,"\n") file.write('Type of course: ') file.write(coursetype) file.write("\nCourse Title: ") file.write(ctcontents.encode('utf-8'))
def download(arguments): """Downloads and uncompresses meta data generated files from Idiap Parameters: arguments (argparse.Namespace): A set of arguments passed by the command-line parser Returns: int: A POSIX compliant return value of ``0`` if the download is successful, or ``1`` in case it is not. Raises: IOError: if metafiles exist and ``--force`` was not passed urllib2.HTTPError: if the target resource does not exist on the webserver """ # What should happen as a combination of flags. Legend: # # 0 - Exit, with status 0 # X - Download, overwrite if there # R - Raise exception, err # # +----------+-----------+----------+--------+ # | complete | --missing | --force | none | # +----------+-----------+----------+--------+ # | yes | 0 | X | R | # +----------+-----------+----------+--------+ # | no | X | X | X | # +----------+-----------+----------+--------+ if not arguments.files: print("Skipping download of metadata files for bob.db.%s: no files " "declared" % arguments.name) # Check we're complete in terms of metafiles complete = True for p in arguments.files: if not os.path.exists(p): complete = False break if complete: if arguments.missing: print( "Skipping download of metadata files for `bob.db.%s': complete" % arguments.name) return 0 elif arguments.force: print("Re-downloading metafiles for `bob.db.%s'" % arguments.name) else: raise IOError( "Metadata files are already available. Remove metadata " "files before attempting download or --force") # if you get here, all files aren't there, unpack source_url = os.path.join(arguments.source, arguments.name + ".tar.bz2") target_dir = arguments.test_dir # test case if not target_dir: # puts files on the root of the installed package import pkg_resources try: target_dir = pkg_resources.resource_filename( 'bob.db.%s' % arguments.name, '') except ImportError as e: raise ImportError( "The package `bob.db.%s' is not currently " "installed. N.B.: The database and package names **must** " "match. Your package should be named `bob.db.%s', if the driver " "name for your database is `%s'. Check." % (3 * (arguments.name, ))) # download file from Idiap server, unpack and remove it import sys import tempfile import tarfile import pkg_resources from .utils import safe_tarmembers if sys.version_info[0] <= 2: import urllib2 as urllib else: import urllib.request as urllib print("Extracting url `%s' into `%s'" % (source_url, target_dir)) u = urllib.urlopen(source_url) f = tempfile.NamedTemporaryFile(suffix=".tar.bz2") open(f.name, 'wb').write(u.read()) t = tarfile.open(fileobj=f, mode='r:bz2') members = list(safe_tarmembers(t)) for k, m in enumerate(members): print("x [%d/%d] %s" % ( k + 1, len(members), m.name, )) t.extract(m, target_dir) t.close() f.close()
#index = cl2.index(content) + 1 hyip.addPayment_method('Bankwire') else: six.print_('payment methods parsed') six.print_(small2) six.print_(cl2) six.print_(tabl0) output(hyip) elif method == 'mechanize': six.print_('Not supported yet! Use native or dont define @method at all') elif method == 'urllib2': six.print_('Not supported yet! Use native or dont define @method at all') exit(1) req = urllib.request(goldpoll_url) # urllib2.Request(goldpoll_url) response = urllib.urlopen(req) # urllib2.urlopen(req) the_page = response.read() webpage = the_page.decode("ISO-8859-1") parser = etree.HTMLParser() tree = etree.fromstring(webpage, parser) elements_c10 = tree.xpath('//table[@class="cl0"]') six.print_(elements_c10) if 'popularhyip' in sites: if method == 'static': dir_separator = ('\\' if is_windows() else '/') doc = html.parse('input' + dir_separator + geckoname) #print etree.tostring(doc) elements_status1 = doc.xpath('//tr[@class="status1" and (not(@id))]') six.print_(len(elements_status1)) for element in elements_status1: six.print_('')