def __init__(self, data_dir, train_data_url, test_data_url, columns, label_column, categorical_columns, continuous_columns): """Constructor of CensusDataSource. Args: data_dir: Directory to save/load the data files train_data_url: URL from which the training data can be downloaded test_data_url: URL from which the test data can be downloaded columns: Columns to retrieve from the data files (A list of strings) label_column: Name of the label column categorical_columns: Names of the categorical columns (A list of strings) continuous_columns: Names of the continuous columns (A list of strings) """ # Retrieve data from disk (if available) or download from the web. train_file_path = os.path.join(data_dir, "adult.data") if os.path.isfile(train_file_path): print("Loading training data from file: %s" % train_file_path) train_file = open(train_file_path) else: urllib.urlretrieve(train_data_url, train_file_path) test_file_path = os.path.join(data_dir, "adult.test") if os.path.isfile(test_file_path): print("Loading test data from file: %s" % test_file_path) test_file = open(test_file_path) else: test_file = open(test_file_path) urllib.urlretrieve(test_data_url, test_file_path) # Read the training and testing data sets into Pandas DataFrame. import pandas # pylint: disable=g-import-not-at-top self._df_train = pandas.read_csv(train_file, names=columns, skipinitialspace=True) self._df_test = pandas.read_csv(test_file, names=columns, skipinitialspace=True, skiprows=1) # Remove the NaN values in the last rows of the tables self._df_train = self._df_train[:-1] self._df_test = self._df_test[:-1] # Apply the threshold to get the labels. income_thresh = lambda x: ">50K" in x self._df_train[label_column] = ( self._df_train["income_bracket"].apply(income_thresh)).astype(int) self._df_test[label_column] = ( self._df_test["income_bracket"].apply(income_thresh)).astype(int) self.label_column = label_column self.categorical_columns = categorical_columns self.continuous_columns = continuous_columns
def __init__(self, data_dir, train_data_url, test_data_url, columns, label_column, categorical_columns, continuous_columns): """Constructor of CensusDataSource. Args: data_dir: Directory to save/load the data files train_data_url: URL from which the training data can be downloaded test_data_url: URL from which the test data can be downloaded columns: Columns to retrieve from the data files (A list of strings) label_column: Name of the label column categorical_columns: Names of the categorical columns (A list of strings) continuous_columns: Names of the continuous columsn (A list of strings) """ # Retrieve data from disk (if available) or download from the web. train_file_path = os.path.join(data_dir, "adult.data") if os.path.isfile(train_file_path): print("Loading training data from file: %s" % train_file_path) train_file = open(train_file_path) else: urllib.urlretrieve(train_data_url, train_file_path) test_file_path = os.path.join(data_dir, "adult.test") if os.path.isfile(test_file_path): print("Loading test data from file: %s" % test_file_path) test_file = open(test_file_path) else: test_file = open(test_file_path) urllib.urlretrieve(test_data_url, test_file_path) # Read the training and testing data sets into Pandas DataFrame. import pandas # pylint: disable=g-import-not-at-top self._df_train = pandas.read_csv(train_file, names=columns, skipinitialspace=True) self._df_test = pandas.read_csv(test_file, names=columns, skipinitialspace=True, skiprows=1) # Remove the NaN values in the last rows of the tables self._df_train = self._df_train[:-1] self._df_test = self._df_test[:-1] # Apply the threshold to get the labels. income_thresh = lambda x: ">50K" in x self._df_train[label_column] = ( self._df_train["income_bracket"].apply(income_thresh)).astype(int) self._df_test[label_column] = ( self._df_test["income_bracket"].apply(income_thresh)).astype(int) self.label_column = label_column self.categorical_columns = categorical_columns self.continuous_columns = continuous_columns
def maybe_download(directory, filename, url): """Download filename from url unless it's already in directory. Args: directory: path to the directory that will be used. filename: name of the file to download to (do nothing if it already exists). url: URL to download from. Returns: The path to the downloaded file. """ if not tf.gfile.Exists(directory): tf.logging.info("Creating directory %s" % directory) os.mkdir(directory) filepath = os.path.join(directory, filename) if not tf.gfile.Exists(filepath): tf.logging.info("Downloading %s to %s" % (url, filepath)) inprogress_filepath = filepath + ".incomplete" inprogress_filepath, _ = urllib.urlretrieve( url, inprogress_filepath, reporthook=download_report_hook) # Print newline to clear the carriage return from the download progress print() tf.gfile.Rename(inprogress_filepath, filepath) statinfo = os.stat(filepath) tf.logging.info("Successfully downloaded %s, %s bytes." % (filename, statinfo.st_size)) else: tf.logging.info("Not downloading, file already found: %s" % filepath) return filepath
def download_evtracks(fehs=[-2.5,-2.0,-1.5,-1.0,-0.5,0.0, 0.15, 0.3, 0.5], afe=0., phot_system='sdss'): import urllib urlbase = 'http://stellar.dartmouth.edu/models/tracks/{}/'.format(phot_system) for feh in fehs: print('Fetching evolution tracks for feh={}...'.format(feh)) feh_sign = 'p' if feh >= 0 else 'm' afe_sign = 'p' if afe >= 0 else 'm' filename = 'feh{}{:02.0f}afe{}{:01.0f}_{}.tgz'.format(feh_sign,abs(feh*10), afe_sign,abs(afe*10), phot_system) url = urlbase+filename folder = os.path.join(DATADIR, 'dartmouth') if not os.path.exists(folder): os.makedirs(folder) localfile = os.path.join(folder,filename) if not os.path.exists(localfile): urllib.urlretrieve(url,localfile)
def maybe_download(filename, expected_bytes): """Download a file if not present, and make sure it's the right size.""" if not os.path.exists(filename): filename, _ = urllib.urlretrieve(url + filename, filename) statinfo = os.stat(filename) if statinfo.st_size == expected_bytes: print('Found and verified', filename) else: print(statinfo.st_size) raise Exception('Failed to verify ' + filename + '. Can you get to it with a browser?') return filename
def download_evtracks(fehs=[-2.5, -2.0, -1.5, -1.0, -0.5, 0.0, 0.15, 0.3, 0.5], afe=0., phot_system='sdss'): import urllib urlbase = 'http://stellar.dartmouth.edu/models/tracks/{}/'.format( phot_system) for feh in fehs: print('Fetching evolution tracks for feh={}...'.format(feh)) feh_sign = 'p' if feh >= 0 else 'm' afe_sign = 'p' if afe >= 0 else 'm' filename = 'feh{}{:02.0f}afe{}{:01.0f}_{}.tgz'.format( feh_sign, abs(feh * 10), afe_sign, abs(afe * 10), phot_system) url = urlbase + filename folder = os.path.join(DATADIR, 'dartmouth') if not os.path.exists(folder): os.makedirs(folder) localfile = os.path.join(folder, filename) if not os.path.exists(localfile): urllib.urlretrieve(url, localfile)
def download_and_extract(): dest_directory = config.raw_path if not os.path.exists(dest_directory): os.makedirs(dest_directory) filename = config.url.split('/')[-1] filepath = os.path.join(dest_directory, filename) if not os.path.exists(filepath): def _progress(count, block_size, total_size): sys.stdout.write('\rDownloading %s %.2f%%' % (filename, float(count * block_size) / float(total_size) * 100.0)) sys.stdout.flush() filepath, _ = urllib.urlretrieve(config.url, filepath, reporthook=_progress) print('Downloaded', filename) tarfile.open(filepath, 'r:gz').extractall(dest_directory)
def maybe_download_and_extract(): """Download and extract model tar file.""" dest_directory = FLAGS.model_dir if not os.path.exists(dest_directory): os.makedirs(dest_directory) filename = DATA_URL.split('/')[-1] filepath = os.path.join(dest_directory, filename) if not os.path.exists(filepath): def _progress(count, block_size, total_size): sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename, float(count * block_size) / float(total_size) * 100.0)) sys.stdout.flush() filepath, _ = urllib.urlretrieve(DATA_URL, filepath, _progress) print() statinfo = os.stat(filepath) print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') tarfile.open(filepath, 'r:gz').extractall(dest_directory)
def crop_by_loc(input, img_id, r1, r2, r3, r4): im = Image.open(input) imgwidth, imgheight = im.size box = (r1, r2, r3, r4) a = im.crop(box) print(a) try: a.save("IMG-%d.jpg" % img_id) except: print("There is an error") data = json.load(open('../../carparks.db')) anu_car_park = data["carparks"][0] urllib.urlretrieve( "http://maps.googleapis.com/maps/api/streetview?size=600x300&location=" + anu_car_park["location"] + "&pitch=" + anu_car_park["pitch"] + "&key=" + data["key"], "1.1.jpg") print( "http://maps.googleapis.com/maps/api/streetview?size=600x 300&location=" + anu_car_park["location"] + "&pitch=" + anu_car_park["pitch"] + "&key=" + data["key"], "1.1.jpg") crop_by_loc("1.1.jpg", 1, 70, 150, 170, 250) crop_by_loc("1.1.jpg", 2, 170, 150, 270, 250) crop_by_loc("1.1.jpg", 3, 270, 150, 370, 250) crop_by_loc("1.1.jpg", 4, 370, 202, 470, 302)
images = [] for img in page.findAll('img'): if 'li=' not in img['src']: images.append(img['src']) i = 0 for img in images: path = io.BytesIO(urllib.urlopen(img).read()) path.seek(0, 2) # 0 bytes from the end size = path.tell() if size < 7000: continue path = os.path.join('./images', '%s.jpg' % i) response = urllib.urlretrieve(img, path) i += 1 r = redis.StrictRedis(host='localhost', port=6379, db=0) FLAGS = None # pylint: disable=line-too-long DATA_URL = "http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz" # pylint: enable=line-too-long class NodeLookup(object): """Converts integer node ID's to human readable labels.""" def __init__(self, label_lookup_path=None, uid_lookup_path=None): if not label_lookup_path:
def get_collection_tags2(image_collection_links, threshold=0.8, current_dict=None): """ Returns a counter dict representing the features for the given list of image urls. The features will be used to classify the collection to an event. The dict includes the following fields (the order doesn't matter): Parameters ---------- image_collection : list[str] List of image file paths (can be relative or absolute) rel_threshold:float: only take tags that have a probability of at least this fraction of the first result Returns ------- Dict[dict[str:count] or str:str] 3 features for each collection. """ import urllib filename = "/home/jessica/Documents/temp.jpg" if (current_dict == None): ans = defaultdict(float) else: ans = current_dict err_count = 0 # Creates graph from saved GraphDef. create_graph2() for i, img_link in enumerate(image_collection_links): if (img_link.endswith("png")): #skip items that are png images print( "###########################################################") print("Error: picture is a png (need jpg)") print( "###########################################################") continue #get url try: print("{}: retreiving url({})".format(i, img_link)) urllib.urlretrieve(img_link, filename) print("retrieval complete!") except: print( "###########################################################") print("Error: URL is not retreivalbe") print( "###########################################################") continue print("now processing...") try: image_data = tf.gfile.FastGFile(filename, 'rb').read() with tf.Session() as sess: # Some useful tensors: # 'softmax:0': A tensor containing the normalized prediction across # 1000 labels. # 'pool_3:0': A tensor containing the next-to-last layer containing 2048 # float description of the image. # 'DecodeJpeg/contents:0': A tensor containing a string providing JPEG # encoding of the image. # Runs the softmax tensor by feeding the image_data as input to the graph. softmax_tensor = sess.graph.get_tensor_by_name( 'final_result:0') predictions = sess.run(softmax_tensor, {'DecodeJpeg/contents:0': image_data}) predictions = np.squeeze(predictions) temp_tags = [] top_k = predictions.argsort()[-num_top_predictions:][::-1] for node_id in top_k: score = predictions[node_id] temp_tags.append((node_id, score)) except: print("ERROR ###################################") err_count += 1 print("Could not give tags to image.") print("ERROR ###################################") continue #keep only the tags that overcome the certain threshold print("tags = [{}]".format(temp_tags)) for tag_score in temp_tags: if (tag_score[0] != 2 and tag_score[1] > threshold): ans[special_nodes[tag_score[0]]] += 1 print("\tadded " + special_nodes[tag_score[0]]) print("Final Error Count:", err_count) return ans
def get_collection_tags2(image_collection_links, threshold = 0.8, current_dict = None): """ Returns a counter dict representing the features for the given list of image urls. The features will be used to classify the collection to an event. The dict includes the following fields (the order doesn't matter): Parameters ---------- image_collection : list[str] List of image file paths (can be relative or absolute) rel_threshold:float: only take tags that have a probability of at least this fraction of the first result Returns ------- Dict[dict[str:count] or str:str] 3 features for each collection. """ import urllib filename ="/home/jessica/Documents/temp.jpg" if(current_dict == None): ans = defaultdict(float) else: ans = current_dict err_count = 0 # Creates graph from saved GraphDef. create_graph2() for i, img_link in enumerate(image_collection_links): if(img_link.endswith("png")): #skip items that are png images print("###########################################################") print("Error: picture is a png (need jpg)") print("###########################################################") continue #get url try: print("{}: retreiving url({})".format(i, img_link)) urllib.urlretrieve(img_link, filename) print("retrieval complete!") except: print("###########################################################") print("Error: URL is not retreivalbe") print("###########################################################") continue print("now processing...") try: image_data = tf.gfile.FastGFile(filename, 'rb').read() with tf.Session() as sess: # Some useful tensors: # 'softmax:0': A tensor containing the normalized prediction across # 1000 labels. # 'pool_3:0': A tensor containing the next-to-last layer containing 2048 # float description of the image. # 'DecodeJpeg/contents:0': A tensor containing a string providing JPEG # encoding of the image. # Runs the softmax tensor by feeding the image_data as input to the graph. softmax_tensor = sess.graph.get_tensor_by_name('final_result:0') predictions = sess.run(softmax_tensor, {'DecodeJpeg/contents:0': image_data}) predictions = np.squeeze(predictions) temp_tags = [] top_k = predictions.argsort()[-num_top_predictions:][::-1] for node_id in top_k: score = predictions[node_id] temp_tags.append((node_id, score)) except: print("ERROR ###################################") err_count +=1 print("Could not give tags to image.") print("ERROR ###################################") continue #keep only the tags that overcome the certain threshold print("tags = [{}]".format(temp_tags)) for tag_score in temp_tags: if(tag_score[0] != 2 and tag_score[1] > threshold): ans[special_nodes[tag_score[0]]] += 1 print("\tadded " + special_nodes[tag_score[0]]) print("Final Error Count:", err_count) return ans
def download(url, fnp, auth=None, force=False, file_size_bytes=0, skipSizeCheck=None, quiet=False, umask=FileUmask): Utils.ensureDir(fnp) fn = os.path.basename(fnp) if not skipSizeCheck: if 0 == file_size_bytes: fsb = Utils.getHttpFileSizeBytes(url, auth) if fsb: file_size_bytes = fsb Utils.deleteFileIfSizeNotMatch(fnp, file_size_bytes) if os.path.exists(fnp): if force: os.remove(fnp) else: return True Utils.quietPrint(quiet, "downloading", url, "...") if url.startswith("ftp://"): fnpTmp = urllib.urlretrieve(url)[0] shutil.move(fnpTmp, fnp) # chmod g+w st = os.stat(fnp) os.chmod(fnp, st.st_mode | umask) return True if not auth: r = requests.get(url) # TODO: streaming # see https://stackoverflow.com/questions/16694907/how-to-download-large-file-in-python-with-requests-py if auth or 403 == r.status_code: keyFnp = os.path.expanduser('~/.encode.txt') if os.path.exists(keyFnp): with open(keyFnp) as f: toks = f.read().strip().split('\n') r = requests.get(url, auth=HTTPBasicAuth(toks[0], toks[1])) # TODO streaming else: raise Exception("no ENCODE password file found at: " + keyFnp) if 200 != r.status_code: Utils.quietPrint(quiet, "could not download", url) Utils.quietPrint(quiet, "status_code:", r.status_code) return False # with open(fnpTmp, "wb") as f: try: fnpTmp = None with tempfile.NamedTemporaryFile("wb", delete=False) as f: f.write(r.content) fnpTmp = f.name shutil.move(fnpTmp, fnp) except: raise finally: if fnpTmp and os.path.exists(fnpTmp): os.remove(fnpTmp) # chmod g+w st = os.stat(fnp) os.chmod(fnp, st.st_mode | umask) return True
def getlog(self): global ocram try: sourcefile = 'https://www.openvix.co.uk/feeds/%s/%s/%s-git.log' % ( getImageDistro(), getImageVersion(), self.logtype) sourcefile, headers = urllib.urlretrieve(sourcefile) rename(sourcefile, '/tmp/' + self.logtype + '-git.log') fd = open('/tmp/' + self.logtype + '-git.log', 'r') releasenotes = fd.read() fd.close() except: releasenotes = '404 Not Found' if '404 Not Found' not in releasenotes: releasenotes = releasenotes.replace('[openvix] Zeus Release.', 'openvix: build 000') releasenotes = releasenotes.replace('\nopenvix: build', "\n\nopenvix: build") releasenotes = releasenotes.split('\n\n') ver = -1 releasever = "" viewrelease = "" while not releasever.isdigit(): ver += 1 releasever = releasenotes[int(ver)].split('\n') releasever = releasever[0].split(' ') if len(releasever) > 2: releasever = releasever[2].replace(':', "") else: releasever = releasever[0].replace(':', "") if self.logtype == 'oe': if int(getImageBuild()) == 1: imagever = int(getImageBuild()) - 1 else: imagever = int(getImageBuild()) else: imagever = int(getImageBuild()) + 905 while int(releasever) > int(imagever): if ocram: viewrelease += releasenotes[int(ver)] + '\n' + ocram + '\n' ocram = "" else: viewrelease += releasenotes[int(ver)] + '\n\n' ver += 1 releasever = releasenotes[int(ver)].split('\n') releasever = releasever[0].split(' ') releasever = releasever[2].replace(':', "") if not viewrelease and ocram: viewrelease = ocram ocram = "" self["text"].setText(viewrelease) summarytext = viewrelease.split(':\n') try: self['title_summary'].setText(summarytext[0] + ':') self['text_summary'].setText(summarytext[1]) except: self['title_summary'].setText("") self['text_summary'].setText(viewrelease) else: self['title_summary'].setText("") self['text_summary'].setText(_("Error downloading change log.")) self['text'].setText(_("Error downloading change log."))